##// END OF EJS Templates
formatting: re-blacken match.py...
Matt Harbison -
r46681:1f0ed7e6 default
parent child Browse files
Show More
@@ -1,1637 +1,1634 b''
1 # match.py - filename matching
1 # match.py - filename matching
2 #
2 #
3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import, print_function
8 from __future__ import absolute_import, print_function
9
9
10 import copy
10 import copy
11 import itertools
11 import itertools
12 import os
12 import os
13 import re
13 import re
14
14
15 from .i18n import _
15 from .i18n import _
16 from .pycompat import open
16 from .pycompat import open
17 from . import (
17 from . import (
18 encoding,
18 encoding,
19 error,
19 error,
20 pathutil,
20 pathutil,
21 policy,
21 policy,
22 pycompat,
22 pycompat,
23 util,
23 util,
24 )
24 )
25 from .utils import stringutil
25 from .utils import stringutil
26
26
27 rustmod = policy.importrust('dirstate')
27 rustmod = policy.importrust('dirstate')
28
28
29 allpatternkinds = (
29 allpatternkinds = (
30 b're',
30 b're',
31 b'glob',
31 b'glob',
32 b'path',
32 b'path',
33 b'relglob',
33 b'relglob',
34 b'relpath',
34 b'relpath',
35 b'relre',
35 b'relre',
36 b'rootglob',
36 b'rootglob',
37 b'listfile',
37 b'listfile',
38 b'listfile0',
38 b'listfile0',
39 b'set',
39 b'set',
40 b'include',
40 b'include',
41 b'subinclude',
41 b'subinclude',
42 b'rootfilesin',
42 b'rootfilesin',
43 )
43 )
44 cwdrelativepatternkinds = (b'relpath', b'glob')
44 cwdrelativepatternkinds = (b'relpath', b'glob')
45
45
46 propertycache = util.propertycache
46 propertycache = util.propertycache
47
47
48
48
49 def _rematcher(regex):
49 def _rematcher(regex):
50 """compile the regexp with the best available regexp engine and return a
50 """compile the regexp with the best available regexp engine and return a
51 matcher function"""
51 matcher function"""
52 m = util.re.compile(regex)
52 m = util.re.compile(regex)
53 try:
53 try:
54 # slightly faster, provided by facebook's re2 bindings
54 # slightly faster, provided by facebook's re2 bindings
55 return m.test_match
55 return m.test_match
56 except AttributeError:
56 except AttributeError:
57 return m.match
57 return m.match
58
58
59
59
60 def _expandsets(cwd, kindpats, ctx=None, listsubrepos=False, badfn=None):
60 def _expandsets(cwd, kindpats, ctx=None, listsubrepos=False, badfn=None):
61 '''Returns the kindpats list with the 'set' patterns expanded to matchers'''
61 '''Returns the kindpats list with the 'set' patterns expanded to matchers'''
62 matchers = []
62 matchers = []
63 other = []
63 other = []
64
64
65 for kind, pat, source in kindpats:
65 for kind, pat, source in kindpats:
66 if kind == b'set':
66 if kind == b'set':
67 if ctx is None:
67 if ctx is None:
68 raise error.ProgrammingError(
68 raise error.ProgrammingError(
69 b"fileset expression with no context"
69 b"fileset expression with no context"
70 )
70 )
71 matchers.append(ctx.matchfileset(cwd, pat, badfn=badfn))
71 matchers.append(ctx.matchfileset(cwd, pat, badfn=badfn))
72
72
73 if listsubrepos:
73 if listsubrepos:
74 for subpath in ctx.substate:
74 for subpath in ctx.substate:
75 sm = ctx.sub(subpath).matchfileset(cwd, pat, badfn=badfn)
75 sm = ctx.sub(subpath).matchfileset(cwd, pat, badfn=badfn)
76 pm = prefixdirmatcher(subpath, sm, badfn=badfn)
76 pm = prefixdirmatcher(subpath, sm, badfn=badfn)
77 matchers.append(pm)
77 matchers.append(pm)
78
78
79 continue
79 continue
80 other.append((kind, pat, source))
80 other.append((kind, pat, source))
81 return matchers, other
81 return matchers, other
82
82
83
83
84 def _expandsubinclude(kindpats, root):
84 def _expandsubinclude(kindpats, root):
85 """Returns the list of subinclude matcher args and the kindpats without the
85 """Returns the list of subinclude matcher args and the kindpats without the
86 subincludes in it."""
86 subincludes in it."""
87 relmatchers = []
87 relmatchers = []
88 other = []
88 other = []
89
89
90 for kind, pat, source in kindpats:
90 for kind, pat, source in kindpats:
91 if kind == b'subinclude':
91 if kind == b'subinclude':
92 sourceroot = pathutil.dirname(util.normpath(source))
92 sourceroot = pathutil.dirname(util.normpath(source))
93 pat = util.pconvert(pat)
93 pat = util.pconvert(pat)
94 path = pathutil.join(sourceroot, pat)
94 path = pathutil.join(sourceroot, pat)
95
95
96 newroot = pathutil.dirname(path)
96 newroot = pathutil.dirname(path)
97 matcherargs = (newroot, b'', [], [b'include:%s' % path])
97 matcherargs = (newroot, b'', [], [b'include:%s' % path])
98
98
99 prefix = pathutil.canonpath(root, root, newroot)
99 prefix = pathutil.canonpath(root, root, newroot)
100 if prefix:
100 if prefix:
101 prefix += b'/'
101 prefix += b'/'
102 relmatchers.append((prefix, matcherargs))
102 relmatchers.append((prefix, matcherargs))
103 else:
103 else:
104 other.append((kind, pat, source))
104 other.append((kind, pat, source))
105
105
106 return relmatchers, other
106 return relmatchers, other
107
107
108
108
109 def _kindpatsalwaysmatch(kindpats):
109 def _kindpatsalwaysmatch(kindpats):
110 """Checks whether the kindspats match everything, as e.g.
110 """Checks whether the kindspats match everything, as e.g.
111 'relpath:.' does.
111 'relpath:.' does.
112 """
112 """
113 for kind, pat, source in kindpats:
113 for kind, pat, source in kindpats:
114 if pat != b'' or kind not in [b'relpath', b'glob']:
114 if pat != b'' or kind not in [b'relpath', b'glob']:
115 return False
115 return False
116 return True
116 return True
117
117
118
118
119 def _buildkindpatsmatcher(
119 def _buildkindpatsmatcher(
120 matchercls,
120 matchercls,
121 root,
121 root,
122 cwd,
122 cwd,
123 kindpats,
123 kindpats,
124 ctx=None,
124 ctx=None,
125 listsubrepos=False,
125 listsubrepos=False,
126 badfn=None,
126 badfn=None,
127 ):
127 ):
128 matchers = []
128 matchers = []
129 fms, kindpats = _expandsets(
129 fms, kindpats = _expandsets(
130 cwd,
130 cwd,
131 kindpats,
131 kindpats,
132 ctx=ctx,
132 ctx=ctx,
133 listsubrepos=listsubrepos,
133 listsubrepos=listsubrepos,
134 badfn=badfn,
134 badfn=badfn,
135 )
135 )
136 if kindpats:
136 if kindpats:
137 m = matchercls(root, kindpats, badfn=badfn)
137 m = matchercls(root, kindpats, badfn=badfn)
138 matchers.append(m)
138 matchers.append(m)
139 if fms:
139 if fms:
140 matchers.extend(fms)
140 matchers.extend(fms)
141 if not matchers:
141 if not matchers:
142 return nevermatcher(badfn=badfn)
142 return nevermatcher(badfn=badfn)
143 if len(matchers) == 1:
143 if len(matchers) == 1:
144 return matchers[0]
144 return matchers[0]
145 return unionmatcher(matchers)
145 return unionmatcher(matchers)
146
146
147
147
148 def match(
148 def match(
149 root,
149 root,
150 cwd,
150 cwd,
151 patterns=None,
151 patterns=None,
152 include=None,
152 include=None,
153 exclude=None,
153 exclude=None,
154 default=b'glob',
154 default=b'glob',
155 auditor=None,
155 auditor=None,
156 ctx=None,
156 ctx=None,
157 listsubrepos=False,
157 listsubrepos=False,
158 warn=None,
158 warn=None,
159 badfn=None,
159 badfn=None,
160 icasefs=False,
160 icasefs=False,
161 ):
161 ):
162 r"""build an object to match a set of file patterns
162 r"""build an object to match a set of file patterns
163
163
164 arguments:
164 arguments:
165 root - the canonical root of the tree you're matching against
165 root - the canonical root of the tree you're matching against
166 cwd - the current working directory, if relevant
166 cwd - the current working directory, if relevant
167 patterns - patterns to find
167 patterns - patterns to find
168 include - patterns to include (unless they are excluded)
168 include - patterns to include (unless they are excluded)
169 exclude - patterns to exclude (even if they are included)
169 exclude - patterns to exclude (even if they are included)
170 default - if a pattern in patterns has no explicit type, assume this one
170 default - if a pattern in patterns has no explicit type, assume this one
171 auditor - optional path auditor
171 auditor - optional path auditor
172 ctx - optional changecontext
172 ctx - optional changecontext
173 listsubrepos - if True, recurse into subrepositories
173 listsubrepos - if True, recurse into subrepositories
174 warn - optional function used for printing warnings
174 warn - optional function used for printing warnings
175 badfn - optional bad() callback for this matcher instead of the default
175 badfn - optional bad() callback for this matcher instead of the default
176 icasefs - make a matcher for wdir on case insensitive filesystems, which
176 icasefs - make a matcher for wdir on case insensitive filesystems, which
177 normalizes the given patterns to the case in the filesystem
177 normalizes the given patterns to the case in the filesystem
178
178
179 a pattern is one of:
179 a pattern is one of:
180 'glob:<glob>' - a glob relative to cwd
180 'glob:<glob>' - a glob relative to cwd
181 're:<regexp>' - a regular expression
181 're:<regexp>' - a regular expression
182 'path:<path>' - a path relative to repository root, which is matched
182 'path:<path>' - a path relative to repository root, which is matched
183 recursively
183 recursively
184 'rootfilesin:<path>' - a path relative to repository root, which is
184 'rootfilesin:<path>' - a path relative to repository root, which is
185 matched non-recursively (will not match subdirectories)
185 matched non-recursively (will not match subdirectories)
186 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
186 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
187 'relpath:<path>' - a path relative to cwd
187 'relpath:<path>' - a path relative to cwd
188 'relre:<regexp>' - a regexp that needn't match the start of a name
188 'relre:<regexp>' - a regexp that needn't match the start of a name
189 'set:<fileset>' - a fileset expression
189 'set:<fileset>' - a fileset expression
190 'include:<path>' - a file of patterns to read and include
190 'include:<path>' - a file of patterns to read and include
191 'subinclude:<path>' - a file of patterns to match against files under
191 'subinclude:<path>' - a file of patterns to match against files under
192 the same directory
192 the same directory
193 '<something>' - a pattern of the specified default type
193 '<something>' - a pattern of the specified default type
194
194
195 >>> def _match(root, *args, **kwargs):
195 >>> def _match(root, *args, **kwargs):
196 ... return match(util.localpath(root), *args, **kwargs)
196 ... return match(util.localpath(root), *args, **kwargs)
197
197
198 Usually a patternmatcher is returned:
198 Usually a patternmatcher is returned:
199 >>> _match(b'/foo', b'.', [b're:.*\.c$', b'path:foo/a', b'*.py'])
199 >>> _match(b'/foo', b'.', [b're:.*\.c$', b'path:foo/a', b'*.py'])
200 <patternmatcher patterns='.*\\.c$|foo/a(?:/|$)|[^/]*\\.py$'>
200 <patternmatcher patterns='.*\\.c$|foo/a(?:/|$)|[^/]*\\.py$'>
201
201
202 Combining 'patterns' with 'include' (resp. 'exclude') gives an
202 Combining 'patterns' with 'include' (resp. 'exclude') gives an
203 intersectionmatcher (resp. a differencematcher):
203 intersectionmatcher (resp. a differencematcher):
204 >>> type(_match(b'/foo', b'.', [b're:.*\.c$'], include=[b'path:lib']))
204 >>> type(_match(b'/foo', b'.', [b're:.*\.c$'], include=[b'path:lib']))
205 <class 'mercurial.match.intersectionmatcher'>
205 <class 'mercurial.match.intersectionmatcher'>
206 >>> type(_match(b'/foo', b'.', [b're:.*\.c$'], exclude=[b'path:build']))
206 >>> type(_match(b'/foo', b'.', [b're:.*\.c$'], exclude=[b'path:build']))
207 <class 'mercurial.match.differencematcher'>
207 <class 'mercurial.match.differencematcher'>
208
208
209 Notice that, if 'patterns' is empty, an alwaysmatcher is returned:
209 Notice that, if 'patterns' is empty, an alwaysmatcher is returned:
210 >>> _match(b'/foo', b'.', [])
210 >>> _match(b'/foo', b'.', [])
211 <alwaysmatcher>
211 <alwaysmatcher>
212
212
213 The 'default' argument determines which kind of pattern is assumed if a
213 The 'default' argument determines which kind of pattern is assumed if a
214 pattern has no prefix:
214 pattern has no prefix:
215 >>> _match(b'/foo', b'.', [b'.*\.c$'], default=b're')
215 >>> _match(b'/foo', b'.', [b'.*\.c$'], default=b're')
216 <patternmatcher patterns='.*\\.c$'>
216 <patternmatcher patterns='.*\\.c$'>
217 >>> _match(b'/foo', b'.', [b'main.py'], default=b'relpath')
217 >>> _match(b'/foo', b'.', [b'main.py'], default=b'relpath')
218 <patternmatcher patterns='main\\.py(?:/|$)'>
218 <patternmatcher patterns='main\\.py(?:/|$)'>
219 >>> _match(b'/foo', b'.', [b'main.py'], default=b're')
219 >>> _match(b'/foo', b'.', [b'main.py'], default=b're')
220 <patternmatcher patterns='main.py'>
220 <patternmatcher patterns='main.py'>
221
221
222 The primary use of matchers is to check whether a value (usually a file
222 The primary use of matchers is to check whether a value (usually a file
223 name) matches againset one of the patterns given at initialization. There
223 name) matches againset one of the patterns given at initialization. There
224 are two ways of doing this check.
224 are two ways of doing this check.
225
225
226 >>> m = _match(b'/foo', b'', [b're:.*\.c$', b'relpath:a'])
226 >>> m = _match(b'/foo', b'', [b're:.*\.c$', b'relpath:a'])
227
227
228 1. Calling the matcher with a file name returns True if any pattern
228 1. Calling the matcher with a file name returns True if any pattern
229 matches that file name:
229 matches that file name:
230 >>> m(b'a')
230 >>> m(b'a')
231 True
231 True
232 >>> m(b'main.c')
232 >>> m(b'main.c')
233 True
233 True
234 >>> m(b'test.py')
234 >>> m(b'test.py')
235 False
235 False
236
236
237 2. Using the exact() method only returns True if the file name matches one
237 2. Using the exact() method only returns True if the file name matches one
238 of the exact patterns (i.e. not re: or glob: patterns):
238 of the exact patterns (i.e. not re: or glob: patterns):
239 >>> m.exact(b'a')
239 >>> m.exact(b'a')
240 True
240 True
241 >>> m.exact(b'main.c')
241 >>> m.exact(b'main.c')
242 False
242 False
243 """
243 """
244 assert os.path.isabs(root)
244 assert os.path.isabs(root)
245 cwd = os.path.join(root, util.localpath(cwd))
245 cwd = os.path.join(root, util.localpath(cwd))
246 normalize = _donormalize
246 normalize = _donormalize
247 if icasefs:
247 if icasefs:
248 dirstate = ctx.repo().dirstate
248 dirstate = ctx.repo().dirstate
249 dsnormalize = dirstate.normalize
249 dsnormalize = dirstate.normalize
250
250
251 def normalize(patterns, default, root, cwd, auditor, warn):
251 def normalize(patterns, default, root, cwd, auditor, warn):
252 kp = _donormalize(patterns, default, root, cwd, auditor, warn)
252 kp = _donormalize(patterns, default, root, cwd, auditor, warn)
253 kindpats = []
253 kindpats = []
254 for kind, pats, source in kp:
254 for kind, pats, source in kp:
255 if kind not in (b're', b'relre'): # regex can't be normalized
255 if kind not in (b're', b'relre'): # regex can't be normalized
256 p = pats
256 p = pats
257 pats = dsnormalize(pats)
257 pats = dsnormalize(pats)
258
258
259 # Preserve the original to handle a case only rename.
259 # Preserve the original to handle a case only rename.
260 if p != pats and p in dirstate:
260 if p != pats and p in dirstate:
261 kindpats.append((kind, p, source))
261 kindpats.append((kind, p, source))
262
262
263 kindpats.append((kind, pats, source))
263 kindpats.append((kind, pats, source))
264 return kindpats
264 return kindpats
265
265
266 if patterns:
266 if patterns:
267 kindpats = normalize(patterns, default, root, cwd, auditor, warn)
267 kindpats = normalize(patterns, default, root, cwd, auditor, warn)
268 if _kindpatsalwaysmatch(kindpats):
268 if _kindpatsalwaysmatch(kindpats):
269 m = alwaysmatcher(badfn)
269 m = alwaysmatcher(badfn)
270 else:
270 else:
271 m = _buildkindpatsmatcher(
271 m = _buildkindpatsmatcher(
272 patternmatcher,
272 patternmatcher,
273 root,
273 root,
274 cwd,
274 cwd,
275 kindpats,
275 kindpats,
276 ctx=ctx,
276 ctx=ctx,
277 listsubrepos=listsubrepos,
277 listsubrepos=listsubrepos,
278 badfn=badfn,
278 badfn=badfn,
279 )
279 )
280 else:
280 else:
281 # It's a little strange that no patterns means to match everything.
281 # It's a little strange that no patterns means to match everything.
282 # Consider changing this to match nothing (probably using nevermatcher).
282 # Consider changing this to match nothing (probably using nevermatcher).
283 m = alwaysmatcher(badfn)
283 m = alwaysmatcher(badfn)
284
284
285 if include:
285 if include:
286 kindpats = normalize(include, b'glob', root, cwd, auditor, warn)
286 kindpats = normalize(include, b'glob', root, cwd, auditor, warn)
287 im = _buildkindpatsmatcher(
287 im = _buildkindpatsmatcher(
288 includematcher,
288 includematcher,
289 root,
289 root,
290 cwd,
290 cwd,
291 kindpats,
291 kindpats,
292 ctx=ctx,
292 ctx=ctx,
293 listsubrepos=listsubrepos,
293 listsubrepos=listsubrepos,
294 badfn=None,
294 badfn=None,
295 )
295 )
296 m = intersectmatchers(m, im)
296 m = intersectmatchers(m, im)
297 if exclude:
297 if exclude:
298 kindpats = normalize(exclude, b'glob', root, cwd, auditor, warn)
298 kindpats = normalize(exclude, b'glob', root, cwd, auditor, warn)
299 em = _buildkindpatsmatcher(
299 em = _buildkindpatsmatcher(
300 includematcher,
300 includematcher,
301 root,
301 root,
302 cwd,
302 cwd,
303 kindpats,
303 kindpats,
304 ctx=ctx,
304 ctx=ctx,
305 listsubrepos=listsubrepos,
305 listsubrepos=listsubrepos,
306 badfn=None,
306 badfn=None,
307 )
307 )
308 m = differencematcher(m, em)
308 m = differencematcher(m, em)
309 return m
309 return m
310
310
311
311
312 def exact(files, badfn=None):
312 def exact(files, badfn=None):
313 return exactmatcher(files, badfn=badfn)
313 return exactmatcher(files, badfn=badfn)
314
314
315
315
316 def always(badfn=None):
316 def always(badfn=None):
317 return alwaysmatcher(badfn)
317 return alwaysmatcher(badfn)
318
318
319
319
320 def never(badfn=None):
320 def never(badfn=None):
321 return nevermatcher(badfn)
321 return nevermatcher(badfn)
322
322
323
323
324 def badmatch(match, badfn):
324 def badmatch(match, badfn):
325 """Make a copy of the given matcher, replacing its bad method with the given
325 """Make a copy of the given matcher, replacing its bad method with the given
326 one.
326 one.
327 """
327 """
328 m = copy.copy(match)
328 m = copy.copy(match)
329 m.bad = badfn
329 m.bad = badfn
330 return m
330 return m
331
331
332
332
333 def _donormalize(patterns, default, root, cwd, auditor=None, warn=None):
333 def _donormalize(patterns, default, root, cwd, auditor=None, warn=None):
334 """Convert 'kind:pat' from the patterns list to tuples with kind and
334 """Convert 'kind:pat' from the patterns list to tuples with kind and
335 normalized and rooted patterns and with listfiles expanded."""
335 normalized and rooted patterns and with listfiles expanded."""
336 kindpats = []
336 kindpats = []
337 for kind, pat in [_patsplit(p, default) for p in patterns]:
337 for kind, pat in [_patsplit(p, default) for p in patterns]:
338 if kind in cwdrelativepatternkinds:
338 if kind in cwdrelativepatternkinds:
339 pat = pathutil.canonpath(root, cwd, pat, auditor=auditor)
339 pat = pathutil.canonpath(root, cwd, pat, auditor=auditor)
340 elif kind in (b'relglob', b'path', b'rootfilesin', b'rootglob'):
340 elif kind in (b'relglob', b'path', b'rootfilesin', b'rootglob'):
341 pat = util.normpath(pat)
341 pat = util.normpath(pat)
342 elif kind in (b'listfile', b'listfile0'):
342 elif kind in (b'listfile', b'listfile0'):
343 try:
343 try:
344 files = util.readfile(pat)
344 files = util.readfile(pat)
345 if kind == b'listfile0':
345 if kind == b'listfile0':
346 files = files.split(b'\0')
346 files = files.split(b'\0')
347 else:
347 else:
348 files = files.splitlines()
348 files = files.splitlines()
349 files = [f for f in files if f]
349 files = [f for f in files if f]
350 except EnvironmentError:
350 except EnvironmentError:
351 raise error.Abort(_(b"unable to read file list (%s)") % pat)
351 raise error.Abort(_(b"unable to read file list (%s)") % pat)
352 for k, p, source in _donormalize(
352 for k, p, source in _donormalize(
353 files, default, root, cwd, auditor, warn
353 files, default, root, cwd, auditor, warn
354 ):
354 ):
355 kindpats.append((k, p, pat))
355 kindpats.append((k, p, pat))
356 continue
356 continue
357 elif kind == b'include':
357 elif kind == b'include':
358 try:
358 try:
359 fullpath = os.path.join(root, util.localpath(pat))
359 fullpath = os.path.join(root, util.localpath(pat))
360 includepats = readpatternfile(fullpath, warn)
360 includepats = readpatternfile(fullpath, warn)
361 for k, p, source in _donormalize(
361 for k, p, source in _donormalize(
362 includepats, default, root, cwd, auditor, warn
362 includepats, default, root, cwd, auditor, warn
363 ):
363 ):
364 kindpats.append((k, p, source or pat))
364 kindpats.append((k, p, source or pat))
365 except error.Abort as inst:
365 except error.Abort as inst:
366 raise error.Abort(
366 raise error.Abort(
367 b'%s: %s'
367 b'%s: %s'
368 % (
368 % (
369 pat,
369 pat,
370 inst.message,
370 inst.message,
371 ) # pytype: disable=unsupported-operands
371 ) # pytype: disable=unsupported-operands
372 )
372 )
373 except IOError as inst:
373 except IOError as inst:
374 if warn:
374 if warn:
375 warn(
375 warn(
376 _(b"skipping unreadable pattern file '%s': %s\n")
376 _(b"skipping unreadable pattern file '%s': %s\n")
377 % (pat, stringutil.forcebytestr(inst.strerror))
377 % (pat, stringutil.forcebytestr(inst.strerror))
378 )
378 )
379 continue
379 continue
380 # else: re or relre - which cannot be normalized
380 # else: re or relre - which cannot be normalized
381 kindpats.append((kind, pat, b''))
381 kindpats.append((kind, pat, b''))
382 return kindpats
382 return kindpats
383
383
384
384
385 class basematcher(object):
385 class basematcher(object):
386 def __init__(self, badfn=None):
386 def __init__(self, badfn=None):
387 if badfn is not None:
387 if badfn is not None:
388 self.bad = badfn
388 self.bad = badfn
389
389
390 def __call__(self, fn):
390 def __call__(self, fn):
391 return self.matchfn(fn)
391 return self.matchfn(fn)
392
392
393 # Callbacks related to how the matcher is used by dirstate.walk.
393 # Callbacks related to how the matcher is used by dirstate.walk.
394 # Subscribers to these events must monkeypatch the matcher object.
394 # Subscribers to these events must monkeypatch the matcher object.
395 def bad(self, f, msg):
395 def bad(self, f, msg):
396 """Callback from dirstate.walk for each explicit file that can't be
396 """Callback from dirstate.walk for each explicit file that can't be
397 found/accessed, with an error message."""
397 found/accessed, with an error message."""
398
398
399 # If an traversedir is set, it will be called when a directory discovered
399 # If an traversedir is set, it will be called when a directory discovered
400 # by recursive traversal is visited.
400 # by recursive traversal is visited.
401 traversedir = None
401 traversedir = None
402
402
403 @propertycache
403 @propertycache
404 def _files(self):
404 def _files(self):
405 return []
405 return []
406
406
407 def files(self):
407 def files(self):
408 """Explicitly listed files or patterns or roots:
408 """Explicitly listed files or patterns or roots:
409 if no patterns or .always(): empty list,
409 if no patterns or .always(): empty list,
410 if exact: list exact files,
410 if exact: list exact files,
411 if not .anypats(): list all files and dirs,
411 if not .anypats(): list all files and dirs,
412 else: optimal roots"""
412 else: optimal roots"""
413 return self._files
413 return self._files
414
414
415 @propertycache
415 @propertycache
416 def _fileset(self):
416 def _fileset(self):
417 return set(self._files)
417 return set(self._files)
418
418
419 def exact(self, f):
419 def exact(self, f):
420 '''Returns True if f is in .files().'''
420 '''Returns True if f is in .files().'''
421 return f in self._fileset
421 return f in self._fileset
422
422
423 def matchfn(self, f):
423 def matchfn(self, f):
424 return False
424 return False
425
425
426 def visitdir(self, dir):
426 def visitdir(self, dir):
427 """Decides whether a directory should be visited based on whether it
427 """Decides whether a directory should be visited based on whether it
428 has potential matches in it or one of its subdirectories. This is
428 has potential matches in it or one of its subdirectories. This is
429 based on the match's primary, included, and excluded patterns.
429 based on the match's primary, included, and excluded patterns.
430
430
431 Returns the string 'all' if the given directory and all subdirectories
431 Returns the string 'all' if the given directory and all subdirectories
432 should be visited. Otherwise returns True or False indicating whether
432 should be visited. Otherwise returns True or False indicating whether
433 the given directory should be visited.
433 the given directory should be visited.
434 """
434 """
435 return True
435 return True
436
436
437 def visitchildrenset(self, dir):
437 def visitchildrenset(self, dir):
438 """Decides whether a directory should be visited based on whether it
438 """Decides whether a directory should be visited based on whether it
439 has potential matches in it or one of its subdirectories, and
439 has potential matches in it or one of its subdirectories, and
440 potentially lists which subdirectories of that directory should be
440 potentially lists which subdirectories of that directory should be
441 visited. This is based on the match's primary, included, and excluded
441 visited. This is based on the match's primary, included, and excluded
442 patterns.
442 patterns.
443
443
444 This function is very similar to 'visitdir', and the following mapping
444 This function is very similar to 'visitdir', and the following mapping
445 can be applied:
445 can be applied:
446
446
447 visitdir | visitchildrenlist
447 visitdir | visitchildrenlist
448 ----------+-------------------
448 ----------+-------------------
449 False | set()
449 False | set()
450 'all' | 'all'
450 'all' | 'all'
451 True | 'this' OR non-empty set of subdirs -or files- to visit
451 True | 'this' OR non-empty set of subdirs -or files- to visit
452
452
453 Example:
453 Example:
454 Assume matchers ['path:foo/bar', 'rootfilesin:qux'], we would return
454 Assume matchers ['path:foo/bar', 'rootfilesin:qux'], we would return
455 the following values (assuming the implementation of visitchildrenset
455 the following values (assuming the implementation of visitchildrenset
456 is capable of recognizing this; some implementations are not).
456 is capable of recognizing this; some implementations are not).
457
457
458 '' -> {'foo', 'qux'}
458 '' -> {'foo', 'qux'}
459 'baz' -> set()
459 'baz' -> set()
460 'foo' -> {'bar'}
460 'foo' -> {'bar'}
461 # Ideally this would be 'all', but since the prefix nature of matchers
461 # Ideally this would be 'all', but since the prefix nature of matchers
462 # is applied to the entire matcher, we have to downgrade this to
462 # is applied to the entire matcher, we have to downgrade this to
463 # 'this' due to the non-prefix 'rootfilesin'-kind matcher being mixed
463 # 'this' due to the non-prefix 'rootfilesin'-kind matcher being mixed
464 # in.
464 # in.
465 'foo/bar' -> 'this'
465 'foo/bar' -> 'this'
466 'qux' -> 'this'
466 'qux' -> 'this'
467
467
468 Important:
468 Important:
469 Most matchers do not know if they're representing files or
469 Most matchers do not know if they're representing files or
470 directories. They see ['path:dir/f'] and don't know whether 'f' is a
470 directories. They see ['path:dir/f'] and don't know whether 'f' is a
471 file or a directory, so visitchildrenset('dir') for most matchers will
471 file or a directory, so visitchildrenset('dir') for most matchers will
472 return {'f'}, but if the matcher knows it's a file (like exactmatcher
472 return {'f'}, but if the matcher knows it's a file (like exactmatcher
473 does), it may return 'this'. Do not rely on the return being a set
473 does), it may return 'this'. Do not rely on the return being a set
474 indicating that there are no files in this dir to investigate (or
474 indicating that there are no files in this dir to investigate (or
475 equivalently that if there are files to investigate in 'dir' that it
475 equivalently that if there are files to investigate in 'dir' that it
476 will always return 'this').
476 will always return 'this').
477 """
477 """
478 return b'this'
478 return b'this'
479
479
480 def always(self):
480 def always(self):
481 """Matcher will match everything and .files() will be empty --
481 """Matcher will match everything and .files() will be empty --
482 optimization might be possible."""
482 optimization might be possible."""
483 return False
483 return False
484
484
485 def isexact(self):
485 def isexact(self):
486 """Matcher will match exactly the list of files in .files() --
486 """Matcher will match exactly the list of files in .files() --
487 optimization might be possible."""
487 optimization might be possible."""
488 return False
488 return False
489
489
490 def prefix(self):
490 def prefix(self):
491 """Matcher will match the paths in .files() recursively --
491 """Matcher will match the paths in .files() recursively --
492 optimization might be possible."""
492 optimization might be possible."""
493 return False
493 return False
494
494
495 def anypats(self):
495 def anypats(self):
496 """None of .always(), .isexact(), and .prefix() is true --
496 """None of .always(), .isexact(), and .prefix() is true --
497 optimizations will be difficult."""
497 optimizations will be difficult."""
498 return not self.always() and not self.isexact() and not self.prefix()
498 return not self.always() and not self.isexact() and not self.prefix()
499
499
500
500
501 class alwaysmatcher(basematcher):
501 class alwaysmatcher(basematcher):
502 '''Matches everything.'''
502 '''Matches everything.'''
503
503
504 def __init__(self, badfn=None):
504 def __init__(self, badfn=None):
505 super(alwaysmatcher, self).__init__(badfn)
505 super(alwaysmatcher, self).__init__(badfn)
506
506
507 def always(self):
507 def always(self):
508 return True
508 return True
509
509
510 def matchfn(self, f):
510 def matchfn(self, f):
511 return True
511 return True
512
512
513 def visitdir(self, dir):
513 def visitdir(self, dir):
514 return b'all'
514 return b'all'
515
515
516 def visitchildrenset(self, dir):
516 def visitchildrenset(self, dir):
517 return b'all'
517 return b'all'
518
518
519 def __repr__(self):
519 def __repr__(self):
520 return r'<alwaysmatcher>'
520 return r'<alwaysmatcher>'
521
521
522
522
523 class nevermatcher(basematcher):
523 class nevermatcher(basematcher):
524 '''Matches nothing.'''
524 '''Matches nothing.'''
525
525
526 def __init__(self, badfn=None):
526 def __init__(self, badfn=None):
527 super(nevermatcher, self).__init__(badfn)
527 super(nevermatcher, self).__init__(badfn)
528
528
529 # It's a little weird to say that the nevermatcher is an exact matcher
529 # It's a little weird to say that the nevermatcher is an exact matcher
530 # or a prefix matcher, but it seems to make sense to let callers take
530 # or a prefix matcher, but it seems to make sense to let callers take
531 # fast paths based on either. There will be no exact matches, nor any
531 # fast paths based on either. There will be no exact matches, nor any
532 # prefixes (files() returns []), so fast paths iterating over them should
532 # prefixes (files() returns []), so fast paths iterating over them should
533 # be efficient (and correct).
533 # be efficient (and correct).
534 def isexact(self):
534 def isexact(self):
535 return True
535 return True
536
536
537 def prefix(self):
537 def prefix(self):
538 return True
538 return True
539
539
540 def visitdir(self, dir):
540 def visitdir(self, dir):
541 return False
541 return False
542
542
543 def visitchildrenset(self, dir):
543 def visitchildrenset(self, dir):
544 return set()
544 return set()
545
545
546 def __repr__(self):
546 def __repr__(self):
547 return r'<nevermatcher>'
547 return r'<nevermatcher>'
548
548
549
549
550 class predicatematcher(basematcher):
550 class predicatematcher(basematcher):
551 """A matcher adapter for a simple boolean function"""
551 """A matcher adapter for a simple boolean function"""
552
552
553 def __init__(self, predfn, predrepr=None, badfn=None):
553 def __init__(self, predfn, predrepr=None, badfn=None):
554 super(predicatematcher, self).__init__(badfn)
554 super(predicatematcher, self).__init__(badfn)
555 self.matchfn = predfn
555 self.matchfn = predfn
556 self._predrepr = predrepr
556 self._predrepr = predrepr
557
557
558 @encoding.strmethod
558 @encoding.strmethod
559 def __repr__(self):
559 def __repr__(self):
560 s = stringutil.buildrepr(self._predrepr) or pycompat.byterepr(
560 s = stringutil.buildrepr(self._predrepr) or pycompat.byterepr(
561 self.matchfn
561 self.matchfn
562 )
562 )
563 return b'<predicatenmatcher pred=%s>' % s
563 return b'<predicatenmatcher pred=%s>' % s
564
564
565
565
566 def path_or_parents_in_set(path, prefix_set):
566 def path_or_parents_in_set(path, prefix_set):
567 """Returns True if `path` (or any parent of `path`) is in `prefix_set`."""
567 """Returns True if `path` (or any parent of `path`) is in `prefix_set`."""
568 l = len(prefix_set)
568 l = len(prefix_set)
569 if l == 0:
569 if l == 0:
570 return False
570 return False
571 if path in prefix_set:
571 if path in prefix_set:
572 return True
572 return True
573 # If there's more than 5 paths in prefix_set, it's *probably* quicker to
573 # If there's more than 5 paths in prefix_set, it's *probably* quicker to
574 # "walk up" the directory hierarchy instead, with the assumption that most
574 # "walk up" the directory hierarchy instead, with the assumption that most
575 # directory hierarchies are relatively shallow and hash lookup is cheap.
575 # directory hierarchies are relatively shallow and hash lookup is cheap.
576 if l > 5:
576 if l > 5:
577 return any(
577 return any(
578 parentdir in prefix_set for parentdir in pathutil.finddirs(path)
578 parentdir in prefix_set for parentdir in pathutil.finddirs(path)
579 )
579 )
580
580
581 # FIXME: Ideally we'd never get to this point if this is the case - we'd
581 # FIXME: Ideally we'd never get to this point if this is the case - we'd
582 # recognize ourselves as an 'always' matcher and skip this.
582 # recognize ourselves as an 'always' matcher and skip this.
583 if b'' in prefix_set:
583 if b'' in prefix_set:
584 return True
584 return True
585
585
586 if pycompat.ispy3:
586 if pycompat.ispy3:
587 sl = ord(b'/')
587 sl = ord(b'/')
588 else:
588 else:
589 sl = '/'
589 sl = '/'
590
590
591 # We already checked that path isn't in prefix_set exactly, so
591 # We already checked that path isn't in prefix_set exactly, so
592 # `path[len(pf)] should never raise IndexError.
592 # `path[len(pf)] should never raise IndexError.
593 return any(path.startswith(pf) and path[len(pf)] == sl for pf in prefix_set)
593 return any(path.startswith(pf) and path[len(pf)] == sl for pf in prefix_set)
594
594
595
595
596 class patternmatcher(basematcher):
596 class patternmatcher(basematcher):
597 r"""Matches a set of (kind, pat, source) against a 'root' directory.
597 r"""Matches a set of (kind, pat, source) against a 'root' directory.
598
598
599 >>> kindpats = [
599 >>> kindpats = [
600 ... (b're', br'.*\.c$', b''),
600 ... (b're', br'.*\.c$', b''),
601 ... (b'path', b'foo/a', b''),
601 ... (b'path', b'foo/a', b''),
602 ... (b'relpath', b'b', b''),
602 ... (b'relpath', b'b', b''),
603 ... (b'glob', b'*.h', b''),
603 ... (b'glob', b'*.h', b''),
604 ... ]
604 ... ]
605 >>> m = patternmatcher(b'foo', kindpats)
605 >>> m = patternmatcher(b'foo', kindpats)
606 >>> m(b'main.c') # matches re:.*\.c$
606 >>> m(b'main.c') # matches re:.*\.c$
607 True
607 True
608 >>> m(b'b.txt')
608 >>> m(b'b.txt')
609 False
609 False
610 >>> m(b'foo/a') # matches path:foo/a
610 >>> m(b'foo/a') # matches path:foo/a
611 True
611 True
612 >>> m(b'a') # does not match path:b, since 'root' is 'foo'
612 >>> m(b'a') # does not match path:b, since 'root' is 'foo'
613 False
613 False
614 >>> m(b'b') # matches relpath:b, since 'root' is 'foo'
614 >>> m(b'b') # matches relpath:b, since 'root' is 'foo'
615 True
615 True
616 >>> m(b'lib.h') # matches glob:*.h
616 >>> m(b'lib.h') # matches glob:*.h
617 True
617 True
618
618
619 >>> m.files()
619 >>> m.files()
620 ['', 'foo/a', 'b', '']
620 ['', 'foo/a', 'b', '']
621 >>> m.exact(b'foo/a')
621 >>> m.exact(b'foo/a')
622 True
622 True
623 >>> m.exact(b'b')
623 >>> m.exact(b'b')
624 True
624 True
625 >>> m.exact(b'lib.h') # exact matches are for (rel)path kinds
625 >>> m.exact(b'lib.h') # exact matches are for (rel)path kinds
626 False
626 False
627 """
627 """
628
628
629 def __init__(self, root, kindpats, badfn=None):
629 def __init__(self, root, kindpats, badfn=None):
630 super(patternmatcher, self).__init__(badfn)
630 super(patternmatcher, self).__init__(badfn)
631
631
632 self._files = _explicitfiles(kindpats)
632 self._files = _explicitfiles(kindpats)
633 self._prefix = _prefix(kindpats)
633 self._prefix = _prefix(kindpats)
634 self._pats, self.matchfn = _buildmatch(kindpats, b'$', root)
634 self._pats, self.matchfn = _buildmatch(kindpats, b'$', root)
635
635
636 @propertycache
636 @propertycache
637 def _dirs(self):
637 def _dirs(self):
638 return set(pathutil.dirs(self._fileset))
638 return set(pathutil.dirs(self._fileset))
639
639
640 def visitdir(self, dir):
640 def visitdir(self, dir):
641 if self._prefix and dir in self._fileset:
641 if self._prefix and dir in self._fileset:
642 return b'all'
642 return b'all'
643 return (
643 return dir in self._dirs or path_or_parents_in_set(dir, self._fileset)
644 dir in self._dirs
645 or path_or_parents_in_set(dir, self._fileset)
646 )
647
644
648 def visitchildrenset(self, dir):
645 def visitchildrenset(self, dir):
649 ret = self.visitdir(dir)
646 ret = self.visitdir(dir)
650 if ret is True:
647 if ret is True:
651 return b'this'
648 return b'this'
652 elif not ret:
649 elif not ret:
653 return set()
650 return set()
654 assert ret == b'all'
651 assert ret == b'all'
655 return b'all'
652 return b'all'
656
653
657 def prefix(self):
654 def prefix(self):
658 return self._prefix
655 return self._prefix
659
656
660 @encoding.strmethod
657 @encoding.strmethod
661 def __repr__(self):
658 def __repr__(self):
662 return b'<patternmatcher patterns=%r>' % pycompat.bytestr(self._pats)
659 return b'<patternmatcher patterns=%r>' % pycompat.bytestr(self._pats)
663
660
664
661
665 # This is basically a reimplementation of pathutil.dirs that stores the
662 # This is basically a reimplementation of pathutil.dirs that stores the
666 # children instead of just a count of them, plus a small optional optimization
663 # children instead of just a count of them, plus a small optional optimization
667 # to avoid some directories we don't need.
664 # to avoid some directories we don't need.
668 class _dirchildren(object):
665 class _dirchildren(object):
669 def __init__(self, paths, onlyinclude=None):
666 def __init__(self, paths, onlyinclude=None):
670 self._dirs = {}
667 self._dirs = {}
671 self._onlyinclude = onlyinclude or []
668 self._onlyinclude = onlyinclude or []
672 addpath = self.addpath
669 addpath = self.addpath
673 for f in paths:
670 for f in paths:
674 addpath(f)
671 addpath(f)
675
672
676 def addpath(self, path):
673 def addpath(self, path):
677 if path == b'':
674 if path == b'':
678 return
675 return
679 dirs = self._dirs
676 dirs = self._dirs
680 findsplitdirs = _dirchildren._findsplitdirs
677 findsplitdirs = _dirchildren._findsplitdirs
681 for d, b in findsplitdirs(path):
678 for d, b in findsplitdirs(path):
682 if d not in self._onlyinclude:
679 if d not in self._onlyinclude:
683 continue
680 continue
684 dirs.setdefault(d, set()).add(b)
681 dirs.setdefault(d, set()).add(b)
685
682
686 @staticmethod
683 @staticmethod
687 def _findsplitdirs(path):
684 def _findsplitdirs(path):
688 # yields (dirname, basename) tuples, walking back to the root. This is
685 # yields (dirname, basename) tuples, walking back to the root. This is
689 # very similar to pathutil.finddirs, except:
686 # very similar to pathutil.finddirs, except:
690 # - produces a (dirname, basename) tuple, not just 'dirname'
687 # - produces a (dirname, basename) tuple, not just 'dirname'
691 # Unlike manifest._splittopdir, this does not suffix `dirname` with a
688 # Unlike manifest._splittopdir, this does not suffix `dirname` with a
692 # slash.
689 # slash.
693 oldpos = len(path)
690 oldpos = len(path)
694 pos = path.rfind(b'/')
691 pos = path.rfind(b'/')
695 while pos != -1:
692 while pos != -1:
696 yield path[:pos], path[pos + 1 : oldpos]
693 yield path[:pos], path[pos + 1 : oldpos]
697 oldpos = pos
694 oldpos = pos
698 pos = path.rfind(b'/', 0, pos)
695 pos = path.rfind(b'/', 0, pos)
699 yield b'', path[:oldpos]
696 yield b'', path[:oldpos]
700
697
701 def get(self, path):
698 def get(self, path):
702 return self._dirs.get(path, set())
699 return self._dirs.get(path, set())
703
700
704
701
705 class includematcher(basematcher):
702 class includematcher(basematcher):
706 def __init__(self, root, kindpats, badfn=None):
703 def __init__(self, root, kindpats, badfn=None):
707 super(includematcher, self).__init__(badfn)
704 super(includematcher, self).__init__(badfn)
708 if rustmod is not None:
705 if rustmod is not None:
709 # We need to pass the patterns to Rust because they can contain
706 # We need to pass the patterns to Rust because they can contain
710 # patterns from the user interface
707 # patterns from the user interface
711 self._kindpats = kindpats
708 self._kindpats = kindpats
712 self._pats, self.matchfn = _buildmatch(kindpats, b'(?:/|$)', root)
709 self._pats, self.matchfn = _buildmatch(kindpats, b'(?:/|$)', root)
713 self._prefix = _prefix(kindpats)
710 self._prefix = _prefix(kindpats)
714 roots, dirs, parents = _rootsdirsandparents(kindpats)
711 roots, dirs, parents = _rootsdirsandparents(kindpats)
715 # roots are directories which are recursively included.
712 # roots are directories which are recursively included.
716 self._roots = set(roots)
713 self._roots = set(roots)
717 # dirs are directories which are non-recursively included.
714 # dirs are directories which are non-recursively included.
718 self._dirs = set(dirs)
715 self._dirs = set(dirs)
719 # parents are directories which are non-recursively included because
716 # parents are directories which are non-recursively included because
720 # they are needed to get to items in _dirs or _roots.
717 # they are needed to get to items in _dirs or _roots.
721 self._parents = parents
718 self._parents = parents
722
719
723 def visitdir(self, dir):
720 def visitdir(self, dir):
724 if self._prefix and dir in self._roots:
721 if self._prefix and dir in self._roots:
725 return b'all'
722 return b'all'
726 return (
723 return (
727 dir in self._dirs
724 dir in self._dirs
728 or dir in self._parents
725 or dir in self._parents
729 or path_or_parents_in_set(dir, self._roots)
726 or path_or_parents_in_set(dir, self._roots)
730 )
727 )
731
728
732 @propertycache
729 @propertycache
733 def _allparentschildren(self):
730 def _allparentschildren(self):
734 # It may seem odd that we add dirs, roots, and parents, and then
731 # It may seem odd that we add dirs, roots, and parents, and then
735 # restrict to only parents. This is to catch the case of:
732 # restrict to only parents. This is to catch the case of:
736 # dirs = ['foo/bar']
733 # dirs = ['foo/bar']
737 # parents = ['foo']
734 # parents = ['foo']
738 # if we asked for the children of 'foo', but had only added
735 # if we asked for the children of 'foo', but had only added
739 # self._parents, we wouldn't be able to respond ['bar'].
736 # self._parents, we wouldn't be able to respond ['bar'].
740 return _dirchildren(
737 return _dirchildren(
741 itertools.chain(self._dirs, self._roots, self._parents),
738 itertools.chain(self._dirs, self._roots, self._parents),
742 onlyinclude=self._parents,
739 onlyinclude=self._parents,
743 )
740 )
744
741
745 def visitchildrenset(self, dir):
742 def visitchildrenset(self, dir):
746 if self._prefix and dir in self._roots:
743 if self._prefix and dir in self._roots:
747 return b'all'
744 return b'all'
748 # Note: this does *not* include the 'dir in self._parents' case from
745 # Note: this does *not* include the 'dir in self._parents' case from
749 # visitdir, that's handled below.
746 # visitdir, that's handled below.
750 if (
747 if (
751 b'' in self._roots
748 b'' in self._roots
752 or dir in self._dirs
749 or dir in self._dirs
753 or path_or_parents_in_set(dir, self._roots)
750 or path_or_parents_in_set(dir, self._roots)
754 ):
751 ):
755 return b'this'
752 return b'this'
756
753
757 if dir in self._parents:
754 if dir in self._parents:
758 return self._allparentschildren.get(dir) or set()
755 return self._allparentschildren.get(dir) or set()
759 return set()
756 return set()
760
757
761 @encoding.strmethod
758 @encoding.strmethod
762 def __repr__(self):
759 def __repr__(self):
763 return b'<includematcher includes=%r>' % pycompat.bytestr(self._pats)
760 return b'<includematcher includes=%r>' % pycompat.bytestr(self._pats)
764
761
765
762
766 class exactmatcher(basematcher):
763 class exactmatcher(basematcher):
767 r"""Matches the input files exactly. They are interpreted as paths, not
764 r"""Matches the input files exactly. They are interpreted as paths, not
768 patterns (so no kind-prefixes).
765 patterns (so no kind-prefixes).
769
766
770 >>> m = exactmatcher([b'a.txt', br're:.*\.c$'])
767 >>> m = exactmatcher([b'a.txt', br're:.*\.c$'])
771 >>> m(b'a.txt')
768 >>> m(b'a.txt')
772 True
769 True
773 >>> m(b'b.txt')
770 >>> m(b'b.txt')
774 False
771 False
775
772
776 Input files that would be matched are exactly those returned by .files()
773 Input files that would be matched are exactly those returned by .files()
777 >>> m.files()
774 >>> m.files()
778 ['a.txt', 're:.*\\.c$']
775 ['a.txt', 're:.*\\.c$']
779
776
780 So pattern 're:.*\.c$' is not considered as a regex, but as a file name
777 So pattern 're:.*\.c$' is not considered as a regex, but as a file name
781 >>> m(b'main.c')
778 >>> m(b'main.c')
782 False
779 False
783 >>> m(br're:.*\.c$')
780 >>> m(br're:.*\.c$')
784 True
781 True
785 """
782 """
786
783
787 def __init__(self, files, badfn=None):
784 def __init__(self, files, badfn=None):
788 super(exactmatcher, self).__init__(badfn)
785 super(exactmatcher, self).__init__(badfn)
789
786
790 if isinstance(files, list):
787 if isinstance(files, list):
791 self._files = files
788 self._files = files
792 else:
789 else:
793 self._files = list(files)
790 self._files = list(files)
794
791
795 matchfn = basematcher.exact
792 matchfn = basematcher.exact
796
793
797 @propertycache
794 @propertycache
798 def _dirs(self):
795 def _dirs(self):
799 return set(pathutil.dirs(self._fileset))
796 return set(pathutil.dirs(self._fileset))
800
797
801 def visitdir(self, dir):
798 def visitdir(self, dir):
802 return dir in self._dirs
799 return dir in self._dirs
803
800
804 def visitchildrenset(self, dir):
801 def visitchildrenset(self, dir):
805 if not self._fileset or dir not in self._dirs:
802 if not self._fileset or dir not in self._dirs:
806 return set()
803 return set()
807
804
808 candidates = self._fileset | self._dirs - {b''}
805 candidates = self._fileset | self._dirs - {b''}
809 if dir != b'':
806 if dir != b'':
810 d = dir + b'/'
807 d = dir + b'/'
811 candidates = {c[len(d) :] for c in candidates if c.startswith(d)}
808 candidates = {c[len(d) :] for c in candidates if c.startswith(d)}
812 # self._dirs includes all of the directories, recursively, so if
809 # self._dirs includes all of the directories, recursively, so if
813 # we're attempting to match foo/bar/baz.txt, it'll have '', 'foo',
810 # we're attempting to match foo/bar/baz.txt, it'll have '', 'foo',
814 # 'foo/bar' in it. Thus we can safely ignore a candidate that has a
811 # 'foo/bar' in it. Thus we can safely ignore a candidate that has a
815 # '/' in it, indicating a it's for a subdir-of-a-subdir; the
812 # '/' in it, indicating a it's for a subdir-of-a-subdir; the
816 # immediate subdir will be in there without a slash.
813 # immediate subdir will be in there without a slash.
817 ret = {c for c in candidates if b'/' not in c}
814 ret = {c for c in candidates if b'/' not in c}
818 # We really do not expect ret to be empty, since that would imply that
815 # We really do not expect ret to be empty, since that would imply that
819 # there's something in _dirs that didn't have a file in _fileset.
816 # there's something in _dirs that didn't have a file in _fileset.
820 assert ret
817 assert ret
821 return ret
818 return ret
822
819
823 def isexact(self):
820 def isexact(self):
824 return True
821 return True
825
822
826 @encoding.strmethod
823 @encoding.strmethod
827 def __repr__(self):
824 def __repr__(self):
828 return b'<exactmatcher files=%r>' % self._files
825 return b'<exactmatcher files=%r>' % self._files
829
826
830
827
831 class differencematcher(basematcher):
828 class differencematcher(basematcher):
832 """Composes two matchers by matching if the first matches and the second
829 """Composes two matchers by matching if the first matches and the second
833 does not.
830 does not.
834
831
835 The second matcher's non-matching-attributes (bad, traversedir) are ignored.
832 The second matcher's non-matching-attributes (bad, traversedir) are ignored.
836 """
833 """
837
834
838 def __init__(self, m1, m2):
835 def __init__(self, m1, m2):
839 super(differencematcher, self).__init__()
836 super(differencematcher, self).__init__()
840 self._m1 = m1
837 self._m1 = m1
841 self._m2 = m2
838 self._m2 = m2
842 self.bad = m1.bad
839 self.bad = m1.bad
843 self.traversedir = m1.traversedir
840 self.traversedir = m1.traversedir
844
841
845 def matchfn(self, f):
842 def matchfn(self, f):
846 return self._m1(f) and not self._m2(f)
843 return self._m1(f) and not self._m2(f)
847
844
848 @propertycache
845 @propertycache
849 def _files(self):
846 def _files(self):
850 if self.isexact():
847 if self.isexact():
851 return [f for f in self._m1.files() if self(f)]
848 return [f for f in self._m1.files() if self(f)]
852 # If m1 is not an exact matcher, we can't easily figure out the set of
849 # If m1 is not an exact matcher, we can't easily figure out the set of
853 # files, because its files() are not always files. For example, if
850 # files, because its files() are not always files. For example, if
854 # m1 is "path:dir" and m2 is "rootfileins:.", we don't
851 # m1 is "path:dir" and m2 is "rootfileins:.", we don't
855 # want to remove "dir" from the set even though it would match m2,
852 # want to remove "dir" from the set even though it would match m2,
856 # because the "dir" in m1 may not be a file.
853 # because the "dir" in m1 may not be a file.
857 return self._m1.files()
854 return self._m1.files()
858
855
859 def visitdir(self, dir):
856 def visitdir(self, dir):
860 if self._m2.visitdir(dir) == b'all':
857 if self._m2.visitdir(dir) == b'all':
861 return False
858 return False
862 elif not self._m2.visitdir(dir):
859 elif not self._m2.visitdir(dir):
863 # m2 does not match dir, we can return 'all' here if possible
860 # m2 does not match dir, we can return 'all' here if possible
864 return self._m1.visitdir(dir)
861 return self._m1.visitdir(dir)
865 return bool(self._m1.visitdir(dir))
862 return bool(self._m1.visitdir(dir))
866
863
867 def visitchildrenset(self, dir):
864 def visitchildrenset(self, dir):
868 m2_set = self._m2.visitchildrenset(dir)
865 m2_set = self._m2.visitchildrenset(dir)
869 if m2_set == b'all':
866 if m2_set == b'all':
870 return set()
867 return set()
871 m1_set = self._m1.visitchildrenset(dir)
868 m1_set = self._m1.visitchildrenset(dir)
872 # Possible values for m1: 'all', 'this', set(...), set()
869 # Possible values for m1: 'all', 'this', set(...), set()
873 # Possible values for m2: 'this', set(...), set()
870 # Possible values for m2: 'this', set(...), set()
874 # If m2 has nothing under here that we care about, return m1, even if
871 # If m2 has nothing under here that we care about, return m1, even if
875 # it's 'all'. This is a change in behavior from visitdir, which would
872 # it's 'all'. This is a change in behavior from visitdir, which would
876 # return True, not 'all', for some reason.
873 # return True, not 'all', for some reason.
877 if not m2_set:
874 if not m2_set:
878 return m1_set
875 return m1_set
879 if m1_set in [b'all', b'this']:
876 if m1_set in [b'all', b'this']:
880 # Never return 'all' here if m2_set is any kind of non-empty (either
877 # Never return 'all' here if m2_set is any kind of non-empty (either
881 # 'this' or set(foo)), since m2 might return set() for a
878 # 'this' or set(foo)), since m2 might return set() for a
882 # subdirectory.
879 # subdirectory.
883 return b'this'
880 return b'this'
884 # Possible values for m1: set(...), set()
881 # Possible values for m1: set(...), set()
885 # Possible values for m2: 'this', set(...)
882 # Possible values for m2: 'this', set(...)
886 # We ignore m2's set results. They're possibly incorrect:
883 # We ignore m2's set results. They're possibly incorrect:
887 # m1 = path:dir/subdir, m2=rootfilesin:dir, visitchildrenset(''):
884 # m1 = path:dir/subdir, m2=rootfilesin:dir, visitchildrenset(''):
888 # m1 returns {'dir'}, m2 returns {'dir'}, if we subtracted we'd
885 # m1 returns {'dir'}, m2 returns {'dir'}, if we subtracted we'd
889 # return set(), which is *not* correct, we still need to visit 'dir'!
886 # return set(), which is *not* correct, we still need to visit 'dir'!
890 return m1_set
887 return m1_set
891
888
892 def isexact(self):
889 def isexact(self):
893 return self._m1.isexact()
890 return self._m1.isexact()
894
891
895 @encoding.strmethod
892 @encoding.strmethod
896 def __repr__(self):
893 def __repr__(self):
897 return b'<differencematcher m1=%r, m2=%r>' % (self._m1, self._m2)
894 return b'<differencematcher m1=%r, m2=%r>' % (self._m1, self._m2)
898
895
899
896
900 def intersectmatchers(m1, m2):
897 def intersectmatchers(m1, m2):
901 """Composes two matchers by matching if both of them match.
898 """Composes two matchers by matching if both of them match.
902
899
903 The second matcher's non-matching-attributes (bad, traversedir) are ignored.
900 The second matcher's non-matching-attributes (bad, traversedir) are ignored.
904 """
901 """
905 if m1 is None or m2 is None:
902 if m1 is None or m2 is None:
906 return m1 or m2
903 return m1 or m2
907 if m1.always():
904 if m1.always():
908 m = copy.copy(m2)
905 m = copy.copy(m2)
909 # TODO: Consider encapsulating these things in a class so there's only
906 # TODO: Consider encapsulating these things in a class so there's only
910 # one thing to copy from m1.
907 # one thing to copy from m1.
911 m.bad = m1.bad
908 m.bad = m1.bad
912 m.traversedir = m1.traversedir
909 m.traversedir = m1.traversedir
913 return m
910 return m
914 if m2.always():
911 if m2.always():
915 m = copy.copy(m1)
912 m = copy.copy(m1)
916 return m
913 return m
917 return intersectionmatcher(m1, m2)
914 return intersectionmatcher(m1, m2)
918
915
919
916
920 class intersectionmatcher(basematcher):
917 class intersectionmatcher(basematcher):
921 def __init__(self, m1, m2):
918 def __init__(self, m1, m2):
922 super(intersectionmatcher, self).__init__()
919 super(intersectionmatcher, self).__init__()
923 self._m1 = m1
920 self._m1 = m1
924 self._m2 = m2
921 self._m2 = m2
925 self.bad = m1.bad
922 self.bad = m1.bad
926 self.traversedir = m1.traversedir
923 self.traversedir = m1.traversedir
927
924
928 @propertycache
925 @propertycache
929 def _files(self):
926 def _files(self):
930 if self.isexact():
927 if self.isexact():
931 m1, m2 = self._m1, self._m2
928 m1, m2 = self._m1, self._m2
932 if not m1.isexact():
929 if not m1.isexact():
933 m1, m2 = m2, m1
930 m1, m2 = m2, m1
934 return [f for f in m1.files() if m2(f)]
931 return [f for f in m1.files() if m2(f)]
935 # It neither m1 nor m2 is an exact matcher, we can't easily intersect
932 # It neither m1 nor m2 is an exact matcher, we can't easily intersect
936 # the set of files, because their files() are not always files. For
933 # the set of files, because their files() are not always files. For
937 # example, if intersecting a matcher "-I glob:foo.txt" with matcher of
934 # example, if intersecting a matcher "-I glob:foo.txt" with matcher of
938 # "path:dir2", we don't want to remove "dir2" from the set.
935 # "path:dir2", we don't want to remove "dir2" from the set.
939 return self._m1.files() + self._m2.files()
936 return self._m1.files() + self._m2.files()
940
937
941 def matchfn(self, f):
938 def matchfn(self, f):
942 return self._m1(f) and self._m2(f)
939 return self._m1(f) and self._m2(f)
943
940
944 def visitdir(self, dir):
941 def visitdir(self, dir):
945 visit1 = self._m1.visitdir(dir)
942 visit1 = self._m1.visitdir(dir)
946 if visit1 == b'all':
943 if visit1 == b'all':
947 return self._m2.visitdir(dir)
944 return self._m2.visitdir(dir)
948 # bool() because visit1=True + visit2='all' should not be 'all'
945 # bool() because visit1=True + visit2='all' should not be 'all'
949 return bool(visit1 and self._m2.visitdir(dir))
946 return bool(visit1 and self._m2.visitdir(dir))
950
947
951 def visitchildrenset(self, dir):
948 def visitchildrenset(self, dir):
952 m1_set = self._m1.visitchildrenset(dir)
949 m1_set = self._m1.visitchildrenset(dir)
953 if not m1_set:
950 if not m1_set:
954 return set()
951 return set()
955 m2_set = self._m2.visitchildrenset(dir)
952 m2_set = self._m2.visitchildrenset(dir)
956 if not m2_set:
953 if not m2_set:
957 return set()
954 return set()
958
955
959 if m1_set == b'all':
956 if m1_set == b'all':
960 return m2_set
957 return m2_set
961 elif m2_set == b'all':
958 elif m2_set == b'all':
962 return m1_set
959 return m1_set
963
960
964 if m1_set == b'this' or m2_set == b'this':
961 if m1_set == b'this' or m2_set == b'this':
965 return b'this'
962 return b'this'
966
963
967 assert isinstance(m1_set, set) and isinstance(m2_set, set)
964 assert isinstance(m1_set, set) and isinstance(m2_set, set)
968 return m1_set.intersection(m2_set)
965 return m1_set.intersection(m2_set)
969
966
970 def always(self):
967 def always(self):
971 return self._m1.always() and self._m2.always()
968 return self._m1.always() and self._m2.always()
972
969
973 def isexact(self):
970 def isexact(self):
974 return self._m1.isexact() or self._m2.isexact()
971 return self._m1.isexact() or self._m2.isexact()
975
972
976 @encoding.strmethod
973 @encoding.strmethod
977 def __repr__(self):
974 def __repr__(self):
978 return b'<intersectionmatcher m1=%r, m2=%r>' % (self._m1, self._m2)
975 return b'<intersectionmatcher m1=%r, m2=%r>' % (self._m1, self._m2)
979
976
980
977
981 class subdirmatcher(basematcher):
978 class subdirmatcher(basematcher):
982 """Adapt a matcher to work on a subdirectory only.
979 """Adapt a matcher to work on a subdirectory only.
983
980
984 The paths are remapped to remove/insert the path as needed:
981 The paths are remapped to remove/insert the path as needed:
985
982
986 >>> from . import pycompat
983 >>> from . import pycompat
987 >>> m1 = match(util.localpath(b'/root'), b'', [b'a.txt', b'sub/b.txt'], auditor=lambda name: None)
984 >>> m1 = match(util.localpath(b'/root'), b'', [b'a.txt', b'sub/b.txt'], auditor=lambda name: None)
988 >>> m2 = subdirmatcher(b'sub', m1)
985 >>> m2 = subdirmatcher(b'sub', m1)
989 >>> m2(b'a.txt')
986 >>> m2(b'a.txt')
990 False
987 False
991 >>> m2(b'b.txt')
988 >>> m2(b'b.txt')
992 True
989 True
993 >>> m2.matchfn(b'a.txt')
990 >>> m2.matchfn(b'a.txt')
994 False
991 False
995 >>> m2.matchfn(b'b.txt')
992 >>> m2.matchfn(b'b.txt')
996 True
993 True
997 >>> m2.files()
994 >>> m2.files()
998 ['b.txt']
995 ['b.txt']
999 >>> m2.exact(b'b.txt')
996 >>> m2.exact(b'b.txt')
1000 True
997 True
1001 >>> def bad(f, msg):
998 >>> def bad(f, msg):
1002 ... print(pycompat.sysstr(b"%s: %s" % (f, msg)))
999 ... print(pycompat.sysstr(b"%s: %s" % (f, msg)))
1003 >>> m1.bad = bad
1000 >>> m1.bad = bad
1004 >>> m2.bad(b'x.txt', b'No such file')
1001 >>> m2.bad(b'x.txt', b'No such file')
1005 sub/x.txt: No such file
1002 sub/x.txt: No such file
1006 """
1003 """
1007
1004
1008 def __init__(self, path, matcher):
1005 def __init__(self, path, matcher):
1009 super(subdirmatcher, self).__init__()
1006 super(subdirmatcher, self).__init__()
1010 self._path = path
1007 self._path = path
1011 self._matcher = matcher
1008 self._matcher = matcher
1012 self._always = matcher.always()
1009 self._always = matcher.always()
1013
1010
1014 self._files = [
1011 self._files = [
1015 f[len(path) + 1 :]
1012 f[len(path) + 1 :]
1016 for f in matcher._files
1013 for f in matcher._files
1017 if f.startswith(path + b"/")
1014 if f.startswith(path + b"/")
1018 ]
1015 ]
1019
1016
1020 # If the parent repo had a path to this subrepo and the matcher is
1017 # If the parent repo had a path to this subrepo and the matcher is
1021 # a prefix matcher, this submatcher always matches.
1018 # a prefix matcher, this submatcher always matches.
1022 if matcher.prefix():
1019 if matcher.prefix():
1023 self._always = any(f == path for f in matcher._files)
1020 self._always = any(f == path for f in matcher._files)
1024
1021
1025 def bad(self, f, msg):
1022 def bad(self, f, msg):
1026 self._matcher.bad(self._path + b"/" + f, msg)
1023 self._matcher.bad(self._path + b"/" + f, msg)
1027
1024
1028 def matchfn(self, f):
1025 def matchfn(self, f):
1029 # Some information is lost in the superclass's constructor, so we
1026 # Some information is lost in the superclass's constructor, so we
1030 # can not accurately create the matching function for the subdirectory
1027 # can not accurately create the matching function for the subdirectory
1031 # from the inputs. Instead, we override matchfn() and visitdir() to
1028 # from the inputs. Instead, we override matchfn() and visitdir() to
1032 # call the original matcher with the subdirectory path prepended.
1029 # call the original matcher with the subdirectory path prepended.
1033 return self._matcher.matchfn(self._path + b"/" + f)
1030 return self._matcher.matchfn(self._path + b"/" + f)
1034
1031
1035 def visitdir(self, dir):
1032 def visitdir(self, dir):
1036 if dir == b'':
1033 if dir == b'':
1037 dir = self._path
1034 dir = self._path
1038 else:
1035 else:
1039 dir = self._path + b"/" + dir
1036 dir = self._path + b"/" + dir
1040 return self._matcher.visitdir(dir)
1037 return self._matcher.visitdir(dir)
1041
1038
1042 def visitchildrenset(self, dir):
1039 def visitchildrenset(self, dir):
1043 if dir == b'':
1040 if dir == b'':
1044 dir = self._path
1041 dir = self._path
1045 else:
1042 else:
1046 dir = self._path + b"/" + dir
1043 dir = self._path + b"/" + dir
1047 return self._matcher.visitchildrenset(dir)
1044 return self._matcher.visitchildrenset(dir)
1048
1045
1049 def always(self):
1046 def always(self):
1050 return self._always
1047 return self._always
1051
1048
1052 def prefix(self):
1049 def prefix(self):
1053 return self._matcher.prefix() and not self._always
1050 return self._matcher.prefix() and not self._always
1054
1051
1055 @encoding.strmethod
1052 @encoding.strmethod
1056 def __repr__(self):
1053 def __repr__(self):
1057 return b'<subdirmatcher path=%r, matcher=%r>' % (
1054 return b'<subdirmatcher path=%r, matcher=%r>' % (
1058 self._path,
1055 self._path,
1059 self._matcher,
1056 self._matcher,
1060 )
1057 )
1061
1058
1062
1059
1063 class prefixdirmatcher(basematcher):
1060 class prefixdirmatcher(basematcher):
1064 """Adapt a matcher to work on a parent directory.
1061 """Adapt a matcher to work on a parent directory.
1065
1062
1066 The matcher's non-matching-attributes (bad, traversedir) are ignored.
1063 The matcher's non-matching-attributes (bad, traversedir) are ignored.
1067
1064
1068 The prefix path should usually be the relative path from the root of
1065 The prefix path should usually be the relative path from the root of
1069 this matcher to the root of the wrapped matcher.
1066 this matcher to the root of the wrapped matcher.
1070
1067
1071 >>> m1 = match(util.localpath(b'/root/d/e'), b'f', [b'../a.txt', b'b.txt'], auditor=lambda name: None)
1068 >>> m1 = match(util.localpath(b'/root/d/e'), b'f', [b'../a.txt', b'b.txt'], auditor=lambda name: None)
1072 >>> m2 = prefixdirmatcher(b'd/e', m1)
1069 >>> m2 = prefixdirmatcher(b'd/e', m1)
1073 >>> m2(b'a.txt')
1070 >>> m2(b'a.txt')
1074 False
1071 False
1075 >>> m2(b'd/e/a.txt')
1072 >>> m2(b'd/e/a.txt')
1076 True
1073 True
1077 >>> m2(b'd/e/b.txt')
1074 >>> m2(b'd/e/b.txt')
1078 False
1075 False
1079 >>> m2.files()
1076 >>> m2.files()
1080 ['d/e/a.txt', 'd/e/f/b.txt']
1077 ['d/e/a.txt', 'd/e/f/b.txt']
1081 >>> m2.exact(b'd/e/a.txt')
1078 >>> m2.exact(b'd/e/a.txt')
1082 True
1079 True
1083 >>> m2.visitdir(b'd')
1080 >>> m2.visitdir(b'd')
1084 True
1081 True
1085 >>> m2.visitdir(b'd/e')
1082 >>> m2.visitdir(b'd/e')
1086 True
1083 True
1087 >>> m2.visitdir(b'd/e/f')
1084 >>> m2.visitdir(b'd/e/f')
1088 True
1085 True
1089 >>> m2.visitdir(b'd/e/g')
1086 >>> m2.visitdir(b'd/e/g')
1090 False
1087 False
1091 >>> m2.visitdir(b'd/ef')
1088 >>> m2.visitdir(b'd/ef')
1092 False
1089 False
1093 """
1090 """
1094
1091
1095 def __init__(self, path, matcher, badfn=None):
1092 def __init__(self, path, matcher, badfn=None):
1096 super(prefixdirmatcher, self).__init__(badfn)
1093 super(prefixdirmatcher, self).__init__(badfn)
1097 if not path:
1094 if not path:
1098 raise error.ProgrammingError(b'prefix path must not be empty')
1095 raise error.ProgrammingError(b'prefix path must not be empty')
1099 self._path = path
1096 self._path = path
1100 self._pathprefix = path + b'/'
1097 self._pathprefix = path + b'/'
1101 self._matcher = matcher
1098 self._matcher = matcher
1102
1099
1103 @propertycache
1100 @propertycache
1104 def _files(self):
1101 def _files(self):
1105 return [self._pathprefix + f for f in self._matcher._files]
1102 return [self._pathprefix + f for f in self._matcher._files]
1106
1103
1107 def matchfn(self, f):
1104 def matchfn(self, f):
1108 if not f.startswith(self._pathprefix):
1105 if not f.startswith(self._pathprefix):
1109 return False
1106 return False
1110 return self._matcher.matchfn(f[len(self._pathprefix) :])
1107 return self._matcher.matchfn(f[len(self._pathprefix) :])
1111
1108
1112 @propertycache
1109 @propertycache
1113 def _pathdirs(self):
1110 def _pathdirs(self):
1114 return set(pathutil.finddirs(self._path))
1111 return set(pathutil.finddirs(self._path))
1115
1112
1116 def visitdir(self, dir):
1113 def visitdir(self, dir):
1117 if dir == self._path:
1114 if dir == self._path:
1118 return self._matcher.visitdir(b'')
1115 return self._matcher.visitdir(b'')
1119 if dir.startswith(self._pathprefix):
1116 if dir.startswith(self._pathprefix):
1120 return self._matcher.visitdir(dir[len(self._pathprefix) :])
1117 return self._matcher.visitdir(dir[len(self._pathprefix) :])
1121 return dir in self._pathdirs
1118 return dir in self._pathdirs
1122
1119
1123 def visitchildrenset(self, dir):
1120 def visitchildrenset(self, dir):
1124 if dir == self._path:
1121 if dir == self._path:
1125 return self._matcher.visitchildrenset(b'')
1122 return self._matcher.visitchildrenset(b'')
1126 if dir.startswith(self._pathprefix):
1123 if dir.startswith(self._pathprefix):
1127 return self._matcher.visitchildrenset(dir[len(self._pathprefix) :])
1124 return self._matcher.visitchildrenset(dir[len(self._pathprefix) :])
1128 if dir in self._pathdirs:
1125 if dir in self._pathdirs:
1129 return b'this'
1126 return b'this'
1130 return set()
1127 return set()
1131
1128
1132 def isexact(self):
1129 def isexact(self):
1133 return self._matcher.isexact()
1130 return self._matcher.isexact()
1134
1131
1135 def prefix(self):
1132 def prefix(self):
1136 return self._matcher.prefix()
1133 return self._matcher.prefix()
1137
1134
1138 @encoding.strmethod
1135 @encoding.strmethod
1139 def __repr__(self):
1136 def __repr__(self):
1140 return b'<prefixdirmatcher path=%r, matcher=%r>' % (
1137 return b'<prefixdirmatcher path=%r, matcher=%r>' % (
1141 pycompat.bytestr(self._path),
1138 pycompat.bytestr(self._path),
1142 self._matcher,
1139 self._matcher,
1143 )
1140 )
1144
1141
1145
1142
1146 class unionmatcher(basematcher):
1143 class unionmatcher(basematcher):
1147 """A matcher that is the union of several matchers.
1144 """A matcher that is the union of several matchers.
1148
1145
1149 The non-matching-attributes (bad, traversedir) are taken from the first
1146 The non-matching-attributes (bad, traversedir) are taken from the first
1150 matcher.
1147 matcher.
1151 """
1148 """
1152
1149
1153 def __init__(self, matchers):
1150 def __init__(self, matchers):
1154 m1 = matchers[0]
1151 m1 = matchers[0]
1155 super(unionmatcher, self).__init__()
1152 super(unionmatcher, self).__init__()
1156 self.traversedir = m1.traversedir
1153 self.traversedir = m1.traversedir
1157 self._matchers = matchers
1154 self._matchers = matchers
1158
1155
1159 def matchfn(self, f):
1156 def matchfn(self, f):
1160 for match in self._matchers:
1157 for match in self._matchers:
1161 if match(f):
1158 if match(f):
1162 return True
1159 return True
1163 return False
1160 return False
1164
1161
1165 def visitdir(self, dir):
1162 def visitdir(self, dir):
1166 r = False
1163 r = False
1167 for m in self._matchers:
1164 for m in self._matchers:
1168 v = m.visitdir(dir)
1165 v = m.visitdir(dir)
1169 if v == b'all':
1166 if v == b'all':
1170 return v
1167 return v
1171 r |= v
1168 r |= v
1172 return r
1169 return r
1173
1170
1174 def visitchildrenset(self, dir):
1171 def visitchildrenset(self, dir):
1175 r = set()
1172 r = set()
1176 this = False
1173 this = False
1177 for m in self._matchers:
1174 for m in self._matchers:
1178 v = m.visitchildrenset(dir)
1175 v = m.visitchildrenset(dir)
1179 if not v:
1176 if not v:
1180 continue
1177 continue
1181 if v == b'all':
1178 if v == b'all':
1182 return v
1179 return v
1183 if this or v == b'this':
1180 if this or v == b'this':
1184 this = True
1181 this = True
1185 # don't break, we might have an 'all' in here.
1182 # don't break, we might have an 'all' in here.
1186 continue
1183 continue
1187 assert isinstance(v, set)
1184 assert isinstance(v, set)
1188 r = r.union(v)
1185 r = r.union(v)
1189 if this:
1186 if this:
1190 return b'this'
1187 return b'this'
1191 return r
1188 return r
1192
1189
1193 @encoding.strmethod
1190 @encoding.strmethod
1194 def __repr__(self):
1191 def __repr__(self):
1195 return b'<unionmatcher matchers=%r>' % self._matchers
1192 return b'<unionmatcher matchers=%r>' % self._matchers
1196
1193
1197
1194
1198 def patkind(pattern, default=None):
1195 def patkind(pattern, default=None):
1199 r"""If pattern is 'kind:pat' with a known kind, return kind.
1196 r"""If pattern is 'kind:pat' with a known kind, return kind.
1200
1197
1201 >>> patkind(br're:.*\.c$')
1198 >>> patkind(br're:.*\.c$')
1202 're'
1199 're'
1203 >>> patkind(b'glob:*.c')
1200 >>> patkind(b'glob:*.c')
1204 'glob'
1201 'glob'
1205 >>> patkind(b'relpath:test.py')
1202 >>> patkind(b'relpath:test.py')
1206 'relpath'
1203 'relpath'
1207 >>> patkind(b'main.py')
1204 >>> patkind(b'main.py')
1208 >>> patkind(b'main.py', default=b're')
1205 >>> patkind(b'main.py', default=b're')
1209 're'
1206 're'
1210 """
1207 """
1211 return _patsplit(pattern, default)[0]
1208 return _patsplit(pattern, default)[0]
1212
1209
1213
1210
1214 def _patsplit(pattern, default):
1211 def _patsplit(pattern, default):
1215 """Split a string into the optional pattern kind prefix and the actual
1212 """Split a string into the optional pattern kind prefix and the actual
1216 pattern."""
1213 pattern."""
1217 if b':' in pattern:
1214 if b':' in pattern:
1218 kind, pat = pattern.split(b':', 1)
1215 kind, pat = pattern.split(b':', 1)
1219 if kind in allpatternkinds:
1216 if kind in allpatternkinds:
1220 return kind, pat
1217 return kind, pat
1221 return default, pattern
1218 return default, pattern
1222
1219
1223
1220
1224 def _globre(pat):
1221 def _globre(pat):
1225 r"""Convert an extended glob string to a regexp string.
1222 r"""Convert an extended glob string to a regexp string.
1226
1223
1227 >>> from . import pycompat
1224 >>> from . import pycompat
1228 >>> def bprint(s):
1225 >>> def bprint(s):
1229 ... print(pycompat.sysstr(s))
1226 ... print(pycompat.sysstr(s))
1230 >>> bprint(_globre(br'?'))
1227 >>> bprint(_globre(br'?'))
1231 .
1228 .
1232 >>> bprint(_globre(br'*'))
1229 >>> bprint(_globre(br'*'))
1233 [^/]*
1230 [^/]*
1234 >>> bprint(_globre(br'**'))
1231 >>> bprint(_globre(br'**'))
1235 .*
1232 .*
1236 >>> bprint(_globre(br'**/a'))
1233 >>> bprint(_globre(br'**/a'))
1237 (?:.*/)?a
1234 (?:.*/)?a
1238 >>> bprint(_globre(br'a/**/b'))
1235 >>> bprint(_globre(br'a/**/b'))
1239 a/(?:.*/)?b
1236 a/(?:.*/)?b
1240 >>> bprint(_globre(br'[a*?!^][^b][!c]'))
1237 >>> bprint(_globre(br'[a*?!^][^b][!c]'))
1241 [a*?!^][\^b][^c]
1238 [a*?!^][\^b][^c]
1242 >>> bprint(_globre(br'{a,b}'))
1239 >>> bprint(_globre(br'{a,b}'))
1243 (?:a|b)
1240 (?:a|b)
1244 >>> bprint(_globre(br'.\*\?'))
1241 >>> bprint(_globre(br'.\*\?'))
1245 \.\*\?
1242 \.\*\?
1246 """
1243 """
1247 i, n = 0, len(pat)
1244 i, n = 0, len(pat)
1248 res = b''
1245 res = b''
1249 group = 0
1246 group = 0
1250 escape = util.stringutil.regexbytesescapemap.get
1247 escape = util.stringutil.regexbytesescapemap.get
1251
1248
1252 def peek():
1249 def peek():
1253 return i < n and pat[i : i + 1]
1250 return i < n and pat[i : i + 1]
1254
1251
1255 while i < n:
1252 while i < n:
1256 c = pat[i : i + 1]
1253 c = pat[i : i + 1]
1257 i += 1
1254 i += 1
1258 if c not in b'*?[{},\\':
1255 if c not in b'*?[{},\\':
1259 res += escape(c, c)
1256 res += escape(c, c)
1260 elif c == b'*':
1257 elif c == b'*':
1261 if peek() == b'*':
1258 if peek() == b'*':
1262 i += 1
1259 i += 1
1263 if peek() == b'/':
1260 if peek() == b'/':
1264 i += 1
1261 i += 1
1265 res += b'(?:.*/)?'
1262 res += b'(?:.*/)?'
1266 else:
1263 else:
1267 res += b'.*'
1264 res += b'.*'
1268 else:
1265 else:
1269 res += b'[^/]*'
1266 res += b'[^/]*'
1270 elif c == b'?':
1267 elif c == b'?':
1271 res += b'.'
1268 res += b'.'
1272 elif c == b'[':
1269 elif c == b'[':
1273 j = i
1270 j = i
1274 if j < n and pat[j : j + 1] in b'!]':
1271 if j < n and pat[j : j + 1] in b'!]':
1275 j += 1
1272 j += 1
1276 while j < n and pat[j : j + 1] != b']':
1273 while j < n and pat[j : j + 1] != b']':
1277 j += 1
1274 j += 1
1278 if j >= n:
1275 if j >= n:
1279 res += b'\\['
1276 res += b'\\['
1280 else:
1277 else:
1281 stuff = pat[i:j].replace(b'\\', b'\\\\')
1278 stuff = pat[i:j].replace(b'\\', b'\\\\')
1282 i = j + 1
1279 i = j + 1
1283 if stuff[0:1] == b'!':
1280 if stuff[0:1] == b'!':
1284 stuff = b'^' + stuff[1:]
1281 stuff = b'^' + stuff[1:]
1285 elif stuff[0:1] == b'^':
1282 elif stuff[0:1] == b'^':
1286 stuff = b'\\' + stuff
1283 stuff = b'\\' + stuff
1287 res = b'%s[%s]' % (res, stuff)
1284 res = b'%s[%s]' % (res, stuff)
1288 elif c == b'{':
1285 elif c == b'{':
1289 group += 1
1286 group += 1
1290 res += b'(?:'
1287 res += b'(?:'
1291 elif c == b'}' and group:
1288 elif c == b'}' and group:
1292 res += b')'
1289 res += b')'
1293 group -= 1
1290 group -= 1
1294 elif c == b',' and group:
1291 elif c == b',' and group:
1295 res += b'|'
1292 res += b'|'
1296 elif c == b'\\':
1293 elif c == b'\\':
1297 p = peek()
1294 p = peek()
1298 if p:
1295 if p:
1299 i += 1
1296 i += 1
1300 res += escape(p, p)
1297 res += escape(p, p)
1301 else:
1298 else:
1302 res += escape(c, c)
1299 res += escape(c, c)
1303 else:
1300 else:
1304 res += escape(c, c)
1301 res += escape(c, c)
1305 return res
1302 return res
1306
1303
1307
1304
1308 def _regex(kind, pat, globsuffix):
1305 def _regex(kind, pat, globsuffix):
1309 """Convert a (normalized) pattern of any kind into a
1306 """Convert a (normalized) pattern of any kind into a
1310 regular expression.
1307 regular expression.
1311 globsuffix is appended to the regexp of globs."""
1308 globsuffix is appended to the regexp of globs."""
1312 if not pat and kind in (b'glob', b'relpath'):
1309 if not pat and kind in (b'glob', b'relpath'):
1313 return b''
1310 return b''
1314 if kind == b're':
1311 if kind == b're':
1315 return pat
1312 return pat
1316 if kind in (b'path', b'relpath'):
1313 if kind in (b'path', b'relpath'):
1317 if pat == b'.':
1314 if pat == b'.':
1318 return b''
1315 return b''
1319 return util.stringutil.reescape(pat) + b'(?:/|$)'
1316 return util.stringutil.reescape(pat) + b'(?:/|$)'
1320 if kind == b'rootfilesin':
1317 if kind == b'rootfilesin':
1321 if pat == b'.':
1318 if pat == b'.':
1322 escaped = b''
1319 escaped = b''
1323 else:
1320 else:
1324 # Pattern is a directory name.
1321 # Pattern is a directory name.
1325 escaped = util.stringutil.reescape(pat) + b'/'
1322 escaped = util.stringutil.reescape(pat) + b'/'
1326 # Anything after the pattern must be a non-directory.
1323 # Anything after the pattern must be a non-directory.
1327 return escaped + b'[^/]+$'
1324 return escaped + b'[^/]+$'
1328 if kind == b'relglob':
1325 if kind == b'relglob':
1329 globre = _globre(pat)
1326 globre = _globre(pat)
1330 if globre.startswith(b'[^/]*'):
1327 if globre.startswith(b'[^/]*'):
1331 # When pat has the form *XYZ (common), make the returned regex more
1328 # When pat has the form *XYZ (common), make the returned regex more
1332 # legible by returning the regex for **XYZ instead of **/*XYZ.
1329 # legible by returning the regex for **XYZ instead of **/*XYZ.
1333 return b'.*' + globre[len(b'[^/]*') :] + globsuffix
1330 return b'.*' + globre[len(b'[^/]*') :] + globsuffix
1334 return b'(?:|.*/)' + globre + globsuffix
1331 return b'(?:|.*/)' + globre + globsuffix
1335 if kind == b'relre':
1332 if kind == b'relre':
1336 if pat.startswith(b'^'):
1333 if pat.startswith(b'^'):
1337 return pat
1334 return pat
1338 return b'.*' + pat
1335 return b'.*' + pat
1339 if kind in (b'glob', b'rootglob'):
1336 if kind in (b'glob', b'rootglob'):
1340 return _globre(pat) + globsuffix
1337 return _globre(pat) + globsuffix
1341 raise error.ProgrammingError(b'not a regex pattern: %s:%s' % (kind, pat))
1338 raise error.ProgrammingError(b'not a regex pattern: %s:%s' % (kind, pat))
1342
1339
1343
1340
1344 def _buildmatch(kindpats, globsuffix, root):
1341 def _buildmatch(kindpats, globsuffix, root):
1345 """Return regexp string and a matcher function for kindpats.
1342 """Return regexp string and a matcher function for kindpats.
1346 globsuffix is appended to the regexp of globs."""
1343 globsuffix is appended to the regexp of globs."""
1347 matchfuncs = []
1344 matchfuncs = []
1348
1345
1349 subincludes, kindpats = _expandsubinclude(kindpats, root)
1346 subincludes, kindpats = _expandsubinclude(kindpats, root)
1350 if subincludes:
1347 if subincludes:
1351 submatchers = {}
1348 submatchers = {}
1352
1349
1353 def matchsubinclude(f):
1350 def matchsubinclude(f):
1354 for prefix, matcherargs in subincludes:
1351 for prefix, matcherargs in subincludes:
1355 if f.startswith(prefix):
1352 if f.startswith(prefix):
1356 mf = submatchers.get(prefix)
1353 mf = submatchers.get(prefix)
1357 if mf is None:
1354 if mf is None:
1358 mf = match(*matcherargs)
1355 mf = match(*matcherargs)
1359 submatchers[prefix] = mf
1356 submatchers[prefix] = mf
1360
1357
1361 if mf(f[len(prefix) :]):
1358 if mf(f[len(prefix) :]):
1362 return True
1359 return True
1363 return False
1360 return False
1364
1361
1365 matchfuncs.append(matchsubinclude)
1362 matchfuncs.append(matchsubinclude)
1366
1363
1367 regex = b''
1364 regex = b''
1368 if kindpats:
1365 if kindpats:
1369 if all(k == b'rootfilesin' for k, p, s in kindpats):
1366 if all(k == b'rootfilesin' for k, p, s in kindpats):
1370 dirs = {p for k, p, s in kindpats}
1367 dirs = {p for k, p, s in kindpats}
1371
1368
1372 def mf(f):
1369 def mf(f):
1373 i = f.rfind(b'/')
1370 i = f.rfind(b'/')
1374 if i >= 0:
1371 if i >= 0:
1375 dir = f[:i]
1372 dir = f[:i]
1376 else:
1373 else:
1377 dir = b'.'
1374 dir = b'.'
1378 return dir in dirs
1375 return dir in dirs
1379
1376
1380 regex = b'rootfilesin: %s' % stringutil.pprint(list(sorted(dirs)))
1377 regex = b'rootfilesin: %s' % stringutil.pprint(list(sorted(dirs)))
1381 matchfuncs.append(mf)
1378 matchfuncs.append(mf)
1382 else:
1379 else:
1383 regex, mf = _buildregexmatch(kindpats, globsuffix)
1380 regex, mf = _buildregexmatch(kindpats, globsuffix)
1384 matchfuncs.append(mf)
1381 matchfuncs.append(mf)
1385
1382
1386 if len(matchfuncs) == 1:
1383 if len(matchfuncs) == 1:
1387 return regex, matchfuncs[0]
1384 return regex, matchfuncs[0]
1388 else:
1385 else:
1389 return regex, lambda f: any(mf(f) for mf in matchfuncs)
1386 return regex, lambda f: any(mf(f) for mf in matchfuncs)
1390
1387
1391
1388
1392 MAX_RE_SIZE = 20000
1389 MAX_RE_SIZE = 20000
1393
1390
1394
1391
1395 def _joinregexes(regexps):
1392 def _joinregexes(regexps):
1396 """gather multiple regular expressions into a single one"""
1393 """gather multiple regular expressions into a single one"""
1397 return b'|'.join(regexps)
1394 return b'|'.join(regexps)
1398
1395
1399
1396
1400 def _buildregexmatch(kindpats, globsuffix):
1397 def _buildregexmatch(kindpats, globsuffix):
1401 """Build a match function from a list of kinds and kindpats,
1398 """Build a match function from a list of kinds and kindpats,
1402 return regexp string and a matcher function.
1399 return regexp string and a matcher function.
1403
1400
1404 Test too large input
1401 Test too large input
1405 >>> _buildregexmatch([
1402 >>> _buildregexmatch([
1406 ... (b'relglob', b'?' * MAX_RE_SIZE, b'')
1403 ... (b'relglob', b'?' * MAX_RE_SIZE, b'')
1407 ... ], b'$')
1404 ... ], b'$')
1408 Traceback (most recent call last):
1405 Traceback (most recent call last):
1409 ...
1406 ...
1410 Abort: matcher pattern is too long (20009 bytes)
1407 Abort: matcher pattern is too long (20009 bytes)
1411 """
1408 """
1412 try:
1409 try:
1413 allgroups = []
1410 allgroups = []
1414 regexps = [_regex(k, p, globsuffix) for (k, p, s) in kindpats]
1411 regexps = [_regex(k, p, globsuffix) for (k, p, s) in kindpats]
1415 fullregexp = _joinregexes(regexps)
1412 fullregexp = _joinregexes(regexps)
1416
1413
1417 startidx = 0
1414 startidx = 0
1418 groupsize = 0
1415 groupsize = 0
1419 for idx, r in enumerate(regexps):
1416 for idx, r in enumerate(regexps):
1420 piecesize = len(r)
1417 piecesize = len(r)
1421 if piecesize > MAX_RE_SIZE:
1418 if piecesize > MAX_RE_SIZE:
1422 msg = _(b"matcher pattern is too long (%d bytes)") % piecesize
1419 msg = _(b"matcher pattern is too long (%d bytes)") % piecesize
1423 raise error.Abort(msg)
1420 raise error.Abort(msg)
1424 elif (groupsize + piecesize) > MAX_RE_SIZE:
1421 elif (groupsize + piecesize) > MAX_RE_SIZE:
1425 group = regexps[startidx:idx]
1422 group = regexps[startidx:idx]
1426 allgroups.append(_joinregexes(group))
1423 allgroups.append(_joinregexes(group))
1427 startidx = idx
1424 startidx = idx
1428 groupsize = 0
1425 groupsize = 0
1429 groupsize += piecesize + 1
1426 groupsize += piecesize + 1
1430
1427
1431 if startidx == 0:
1428 if startidx == 0:
1432 matcher = _rematcher(fullregexp)
1429 matcher = _rematcher(fullregexp)
1433 func = lambda s: bool(matcher(s))
1430 func = lambda s: bool(matcher(s))
1434 else:
1431 else:
1435 group = regexps[startidx:]
1432 group = regexps[startidx:]
1436 allgroups.append(_joinregexes(group))
1433 allgroups.append(_joinregexes(group))
1437 allmatchers = [_rematcher(g) for g in allgroups]
1434 allmatchers = [_rematcher(g) for g in allgroups]
1438 func = lambda s: any(m(s) for m in allmatchers)
1435 func = lambda s: any(m(s) for m in allmatchers)
1439 return fullregexp, func
1436 return fullregexp, func
1440 except re.error:
1437 except re.error:
1441 for k, p, s in kindpats:
1438 for k, p, s in kindpats:
1442 try:
1439 try:
1443 _rematcher(_regex(k, p, globsuffix))
1440 _rematcher(_regex(k, p, globsuffix))
1444 except re.error:
1441 except re.error:
1445 if s:
1442 if s:
1446 raise error.Abort(
1443 raise error.Abort(
1447 _(b"%s: invalid pattern (%s): %s") % (s, k, p)
1444 _(b"%s: invalid pattern (%s): %s") % (s, k, p)
1448 )
1445 )
1449 else:
1446 else:
1450 raise error.Abort(_(b"invalid pattern (%s): %s") % (k, p))
1447 raise error.Abort(_(b"invalid pattern (%s): %s") % (k, p))
1451 raise error.Abort(_(b"invalid pattern"))
1448 raise error.Abort(_(b"invalid pattern"))
1452
1449
1453
1450
1454 def _patternrootsanddirs(kindpats):
1451 def _patternrootsanddirs(kindpats):
1455 """Returns roots and directories corresponding to each pattern.
1452 """Returns roots and directories corresponding to each pattern.
1456
1453
1457 This calculates the roots and directories exactly matching the patterns and
1454 This calculates the roots and directories exactly matching the patterns and
1458 returns a tuple of (roots, dirs) for each. It does not return other
1455 returns a tuple of (roots, dirs) for each. It does not return other
1459 directories which may also need to be considered, like the parent
1456 directories which may also need to be considered, like the parent
1460 directories.
1457 directories.
1461 """
1458 """
1462 r = []
1459 r = []
1463 d = []
1460 d = []
1464 for kind, pat, source in kindpats:
1461 for kind, pat, source in kindpats:
1465 if kind in (b'glob', b'rootglob'): # find the non-glob prefix
1462 if kind in (b'glob', b'rootglob'): # find the non-glob prefix
1466 root = []
1463 root = []
1467 for p in pat.split(b'/'):
1464 for p in pat.split(b'/'):
1468 if b'[' in p or b'{' in p or b'*' in p or b'?' in p:
1465 if b'[' in p or b'{' in p or b'*' in p or b'?' in p:
1469 break
1466 break
1470 root.append(p)
1467 root.append(p)
1471 r.append(b'/'.join(root))
1468 r.append(b'/'.join(root))
1472 elif kind in (b'relpath', b'path'):
1469 elif kind in (b'relpath', b'path'):
1473 if pat == b'.':
1470 if pat == b'.':
1474 pat = b''
1471 pat = b''
1475 r.append(pat)
1472 r.append(pat)
1476 elif kind in (b'rootfilesin',):
1473 elif kind in (b'rootfilesin',):
1477 if pat == b'.':
1474 if pat == b'.':
1478 pat = b''
1475 pat = b''
1479 d.append(pat)
1476 d.append(pat)
1480 else: # relglob, re, relre
1477 else: # relglob, re, relre
1481 r.append(b'')
1478 r.append(b'')
1482 return r, d
1479 return r, d
1483
1480
1484
1481
1485 def _roots(kindpats):
1482 def _roots(kindpats):
1486 '''Returns root directories to match recursively from the given patterns.'''
1483 '''Returns root directories to match recursively from the given patterns.'''
1487 roots, dirs = _patternrootsanddirs(kindpats)
1484 roots, dirs = _patternrootsanddirs(kindpats)
1488 return roots
1485 return roots
1489
1486
1490
1487
1491 def _rootsdirsandparents(kindpats):
1488 def _rootsdirsandparents(kindpats):
1492 """Returns roots and exact directories from patterns.
1489 """Returns roots and exact directories from patterns.
1493
1490
1494 `roots` are directories to match recursively, `dirs` should
1491 `roots` are directories to match recursively, `dirs` should
1495 be matched non-recursively, and `parents` are the implicitly required
1492 be matched non-recursively, and `parents` are the implicitly required
1496 directories to walk to items in either roots or dirs.
1493 directories to walk to items in either roots or dirs.
1497
1494
1498 Returns a tuple of (roots, dirs, parents).
1495 Returns a tuple of (roots, dirs, parents).
1499
1496
1500 >>> r = _rootsdirsandparents(
1497 >>> r = _rootsdirsandparents(
1501 ... [(b'glob', b'g/h/*', b''), (b'glob', b'g/h', b''),
1498 ... [(b'glob', b'g/h/*', b''), (b'glob', b'g/h', b''),
1502 ... (b'glob', b'g*', b'')])
1499 ... (b'glob', b'g*', b'')])
1503 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1500 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1504 (['g/h', 'g/h', ''], []) ['', 'g']
1501 (['g/h', 'g/h', ''], []) ['', 'g']
1505 >>> r = _rootsdirsandparents(
1502 >>> r = _rootsdirsandparents(
1506 ... [(b'rootfilesin', b'g/h', b''), (b'rootfilesin', b'', b'')])
1503 ... [(b'rootfilesin', b'g/h', b''), (b'rootfilesin', b'', b'')])
1507 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1504 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1508 ([], ['g/h', '']) ['', 'g']
1505 ([], ['g/h', '']) ['', 'g']
1509 >>> r = _rootsdirsandparents(
1506 >>> r = _rootsdirsandparents(
1510 ... [(b'relpath', b'r', b''), (b'path', b'p/p', b''),
1507 ... [(b'relpath', b'r', b''), (b'path', b'p/p', b''),
1511 ... (b'path', b'', b'')])
1508 ... (b'path', b'', b'')])
1512 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1509 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1513 (['r', 'p/p', ''], []) ['', 'p']
1510 (['r', 'p/p', ''], []) ['', 'p']
1514 >>> r = _rootsdirsandparents(
1511 >>> r = _rootsdirsandparents(
1515 ... [(b'relglob', b'rg*', b''), (b're', b're/', b''),
1512 ... [(b'relglob', b'rg*', b''), (b're', b're/', b''),
1516 ... (b'relre', b'rr', b'')])
1513 ... (b'relre', b'rr', b'')])
1517 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1514 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1518 (['', '', ''], []) ['']
1515 (['', '', ''], []) ['']
1519 """
1516 """
1520 r, d = _patternrootsanddirs(kindpats)
1517 r, d = _patternrootsanddirs(kindpats)
1521
1518
1522 p = set()
1519 p = set()
1523 # Add the parents as non-recursive/exact directories, since they must be
1520 # Add the parents as non-recursive/exact directories, since they must be
1524 # scanned to get to either the roots or the other exact directories.
1521 # scanned to get to either the roots or the other exact directories.
1525 p.update(pathutil.dirs(d))
1522 p.update(pathutil.dirs(d))
1526 p.update(pathutil.dirs(r))
1523 p.update(pathutil.dirs(r))
1527
1524
1528 # FIXME: all uses of this function convert these to sets, do so before
1525 # FIXME: all uses of this function convert these to sets, do so before
1529 # returning.
1526 # returning.
1530 # FIXME: all uses of this function do not need anything in 'roots' and
1527 # FIXME: all uses of this function do not need anything in 'roots' and
1531 # 'dirs' to also be in 'parents', consider removing them before returning.
1528 # 'dirs' to also be in 'parents', consider removing them before returning.
1532 return r, d, p
1529 return r, d, p
1533
1530
1534
1531
1535 def _explicitfiles(kindpats):
1532 def _explicitfiles(kindpats):
1536 """Returns the potential explicit filenames from the patterns.
1533 """Returns the potential explicit filenames from the patterns.
1537
1534
1538 >>> _explicitfiles([(b'path', b'foo/bar', b'')])
1535 >>> _explicitfiles([(b'path', b'foo/bar', b'')])
1539 ['foo/bar']
1536 ['foo/bar']
1540 >>> _explicitfiles([(b'rootfilesin', b'foo/bar', b'')])
1537 >>> _explicitfiles([(b'rootfilesin', b'foo/bar', b'')])
1541 []
1538 []
1542 """
1539 """
1543 # Keep only the pattern kinds where one can specify filenames (vs only
1540 # Keep only the pattern kinds where one can specify filenames (vs only
1544 # directory names).
1541 # directory names).
1545 filable = [kp for kp in kindpats if kp[0] not in (b'rootfilesin',)]
1542 filable = [kp for kp in kindpats if kp[0] not in (b'rootfilesin',)]
1546 return _roots(filable)
1543 return _roots(filable)
1547
1544
1548
1545
1549 def _prefix(kindpats):
1546 def _prefix(kindpats):
1550 '''Whether all the patterns match a prefix (i.e. recursively)'''
1547 '''Whether all the patterns match a prefix (i.e. recursively)'''
1551 for kind, pat, source in kindpats:
1548 for kind, pat, source in kindpats:
1552 if kind not in (b'path', b'relpath'):
1549 if kind not in (b'path', b'relpath'):
1553 return False
1550 return False
1554 return True
1551 return True
1555
1552
1556
1553
1557 _commentre = None
1554 _commentre = None
1558
1555
1559
1556
1560 def readpatternfile(filepath, warn, sourceinfo=False):
1557 def readpatternfile(filepath, warn, sourceinfo=False):
1561 """parse a pattern file, returning a list of
1558 """parse a pattern file, returning a list of
1562 patterns. These patterns should be given to compile()
1559 patterns. These patterns should be given to compile()
1563 to be validated and converted into a match function.
1560 to be validated and converted into a match function.
1564
1561
1565 trailing white space is dropped.
1562 trailing white space is dropped.
1566 the escape character is backslash.
1563 the escape character is backslash.
1567 comments start with #.
1564 comments start with #.
1568 empty lines are skipped.
1565 empty lines are skipped.
1569
1566
1570 lines can be of the following formats:
1567 lines can be of the following formats:
1571
1568
1572 syntax: regexp # defaults following lines to non-rooted regexps
1569 syntax: regexp # defaults following lines to non-rooted regexps
1573 syntax: glob # defaults following lines to non-rooted globs
1570 syntax: glob # defaults following lines to non-rooted globs
1574 re:pattern # non-rooted regular expression
1571 re:pattern # non-rooted regular expression
1575 glob:pattern # non-rooted glob
1572 glob:pattern # non-rooted glob
1576 rootglob:pat # rooted glob (same root as ^ in regexps)
1573 rootglob:pat # rooted glob (same root as ^ in regexps)
1577 pattern # pattern of the current default type
1574 pattern # pattern of the current default type
1578
1575
1579 if sourceinfo is set, returns a list of tuples:
1576 if sourceinfo is set, returns a list of tuples:
1580 (pattern, lineno, originalline).
1577 (pattern, lineno, originalline).
1581 This is useful to debug ignore patterns.
1578 This is useful to debug ignore patterns.
1582 """
1579 """
1583
1580
1584 syntaxes = {
1581 syntaxes = {
1585 b're': b'relre:',
1582 b're': b'relre:',
1586 b'regexp': b'relre:',
1583 b'regexp': b'relre:',
1587 b'glob': b'relglob:',
1584 b'glob': b'relglob:',
1588 b'rootglob': b'rootglob:',
1585 b'rootglob': b'rootglob:',
1589 b'include': b'include',
1586 b'include': b'include',
1590 b'subinclude': b'subinclude',
1587 b'subinclude': b'subinclude',
1591 }
1588 }
1592 syntax = b'relre:'
1589 syntax = b'relre:'
1593 patterns = []
1590 patterns = []
1594
1591
1595 fp = open(filepath, b'rb')
1592 fp = open(filepath, b'rb')
1596 for lineno, line in enumerate(util.iterfile(fp), start=1):
1593 for lineno, line in enumerate(util.iterfile(fp), start=1):
1597 if b"#" in line:
1594 if b"#" in line:
1598 global _commentre
1595 global _commentre
1599 if not _commentre:
1596 if not _commentre:
1600 _commentre = util.re.compile(br'((?:^|[^\\])(?:\\\\)*)#.*')
1597 _commentre = util.re.compile(br'((?:^|[^\\])(?:\\\\)*)#.*')
1601 # remove comments prefixed by an even number of escapes
1598 # remove comments prefixed by an even number of escapes
1602 m = _commentre.search(line)
1599 m = _commentre.search(line)
1603 if m:
1600 if m:
1604 line = line[: m.end(1)]
1601 line = line[: m.end(1)]
1605 # fixup properly escaped comments that survived the above
1602 # fixup properly escaped comments that survived the above
1606 line = line.replace(b"\\#", b"#")
1603 line = line.replace(b"\\#", b"#")
1607 line = line.rstrip()
1604 line = line.rstrip()
1608 if not line:
1605 if not line:
1609 continue
1606 continue
1610
1607
1611 if line.startswith(b'syntax:'):
1608 if line.startswith(b'syntax:'):
1612 s = line[7:].strip()
1609 s = line[7:].strip()
1613 try:
1610 try:
1614 syntax = syntaxes[s]
1611 syntax = syntaxes[s]
1615 except KeyError:
1612 except KeyError:
1616 if warn:
1613 if warn:
1617 warn(
1614 warn(
1618 _(b"%s: ignoring invalid syntax '%s'\n") % (filepath, s)
1615 _(b"%s: ignoring invalid syntax '%s'\n") % (filepath, s)
1619 )
1616 )
1620 continue
1617 continue
1621
1618
1622 linesyntax = syntax
1619 linesyntax = syntax
1623 for s, rels in pycompat.iteritems(syntaxes):
1620 for s, rels in pycompat.iteritems(syntaxes):
1624 if line.startswith(rels):
1621 if line.startswith(rels):
1625 linesyntax = rels
1622 linesyntax = rels
1626 line = line[len(rels) :]
1623 line = line[len(rels) :]
1627 break
1624 break
1628 elif line.startswith(s + b':'):
1625 elif line.startswith(s + b':'):
1629 linesyntax = rels
1626 linesyntax = rels
1630 line = line[len(s) + 1 :]
1627 line = line[len(s) + 1 :]
1631 break
1628 break
1632 if sourceinfo:
1629 if sourceinfo:
1633 patterns.append((linesyntax + line, lineno, line))
1630 patterns.append((linesyntax + line, lineno, line))
1634 else:
1631 else:
1635 patterns.append(linesyntax + line)
1632 patterns.append(linesyntax + line)
1636 fp.close()
1633 fp.close()
1637 return patterns
1634 return patterns
General Comments 0
You need to be logged in to leave comments. Login now