##// END OF EJS Templates
match: normalize `cwd` early...
Martin von Zweigbergk -
r44402:5685ce2e default
parent child Browse files
Show More
@@ -1,1615 +1,1616 b''
1 # match.py - filename matching
1 # match.py - filename matching
2 #
2 #
3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import, print_function
8 from __future__ import absolute_import, print_function
9
9
10 import copy
10 import copy
11 import itertools
11 import itertools
12 import os
12 import os
13 import re
13 import re
14
14
15 from .i18n import _
15 from .i18n import _
16 from .pycompat import open
16 from .pycompat import open
17 from . import (
17 from . import (
18 encoding,
18 encoding,
19 error,
19 error,
20 pathutil,
20 pathutil,
21 policy,
21 policy,
22 pycompat,
22 pycompat,
23 util,
23 util,
24 )
24 )
25 from .utils import stringutil
25 from .utils import stringutil
26
26
27 rustmod = policy.importrust('filepatterns')
27 rustmod = policy.importrust('filepatterns')
28
28
29 allpatternkinds = (
29 allpatternkinds = (
30 b're',
30 b're',
31 b'glob',
31 b'glob',
32 b'path',
32 b'path',
33 b'relglob',
33 b'relglob',
34 b'relpath',
34 b'relpath',
35 b'relre',
35 b'relre',
36 b'rootglob',
36 b'rootglob',
37 b'listfile',
37 b'listfile',
38 b'listfile0',
38 b'listfile0',
39 b'set',
39 b'set',
40 b'include',
40 b'include',
41 b'subinclude',
41 b'subinclude',
42 b'rootfilesin',
42 b'rootfilesin',
43 )
43 )
44 cwdrelativepatternkinds = (b'relpath', b'glob')
44 cwdrelativepatternkinds = (b'relpath', b'glob')
45
45
46 propertycache = util.propertycache
46 propertycache = util.propertycache
47
47
48
48
49 def _rematcher(regex):
49 def _rematcher(regex):
50 '''compile the regexp with the best available regexp engine and return a
50 '''compile the regexp with the best available regexp engine and return a
51 matcher function'''
51 matcher function'''
52 m = util.re.compile(regex)
52 m = util.re.compile(regex)
53 try:
53 try:
54 # slightly faster, provided by facebook's re2 bindings
54 # slightly faster, provided by facebook's re2 bindings
55 return m.test_match
55 return m.test_match
56 except AttributeError:
56 except AttributeError:
57 return m.match
57 return m.match
58
58
59
59
60 def _expandsets(kindpats, ctx=None, listsubrepos=False, badfn=None):
60 def _expandsets(kindpats, ctx=None, listsubrepos=False, badfn=None):
61 '''Returns the kindpats list with the 'set' patterns expanded to matchers'''
61 '''Returns the kindpats list with the 'set' patterns expanded to matchers'''
62 matchers = []
62 matchers = []
63 other = []
63 other = []
64
64
65 for kind, pat, source in kindpats:
65 for kind, pat, source in kindpats:
66 if kind == b'set':
66 if kind == b'set':
67 if ctx is None:
67 if ctx is None:
68 raise error.ProgrammingError(
68 raise error.ProgrammingError(
69 b"fileset expression with no context"
69 b"fileset expression with no context"
70 )
70 )
71 matchers.append(ctx.matchfileset(pat, badfn=badfn))
71 matchers.append(ctx.matchfileset(pat, badfn=badfn))
72
72
73 if listsubrepos:
73 if listsubrepos:
74 for subpath in ctx.substate:
74 for subpath in ctx.substate:
75 sm = ctx.sub(subpath).matchfileset(pat, badfn=badfn)
75 sm = ctx.sub(subpath).matchfileset(pat, badfn=badfn)
76 pm = prefixdirmatcher(subpath, sm, badfn=badfn)
76 pm = prefixdirmatcher(subpath, sm, badfn=badfn)
77 matchers.append(pm)
77 matchers.append(pm)
78
78
79 continue
79 continue
80 other.append((kind, pat, source))
80 other.append((kind, pat, source))
81 return matchers, other
81 return matchers, other
82
82
83
83
84 def _expandsubinclude(kindpats, root):
84 def _expandsubinclude(kindpats, root):
85 '''Returns the list of subinclude matcher args and the kindpats without the
85 '''Returns the list of subinclude matcher args and the kindpats without the
86 subincludes in it.'''
86 subincludes in it.'''
87 relmatchers = []
87 relmatchers = []
88 other = []
88 other = []
89
89
90 for kind, pat, source in kindpats:
90 for kind, pat, source in kindpats:
91 if kind == b'subinclude':
91 if kind == b'subinclude':
92 sourceroot = pathutil.dirname(util.normpath(source))
92 sourceroot = pathutil.dirname(util.normpath(source))
93 pat = util.pconvert(pat)
93 pat = util.pconvert(pat)
94 path = pathutil.join(sourceroot, pat)
94 path = pathutil.join(sourceroot, pat)
95
95
96 newroot = pathutil.dirname(path)
96 newroot = pathutil.dirname(path)
97 matcherargs = (newroot, b'', [], [b'include:%s' % path])
97 matcherargs = (newroot, b'', [], [b'include:%s' % path])
98
98
99 prefix = pathutil.canonpath(root, root, newroot)
99 prefix = pathutil.canonpath(root, root, newroot)
100 if prefix:
100 if prefix:
101 prefix += b'/'
101 prefix += b'/'
102 relmatchers.append((prefix, matcherargs))
102 relmatchers.append((prefix, matcherargs))
103 else:
103 else:
104 other.append((kind, pat, source))
104 other.append((kind, pat, source))
105
105
106 return relmatchers, other
106 return relmatchers, other
107
107
108
108
109 def _kindpatsalwaysmatch(kindpats):
109 def _kindpatsalwaysmatch(kindpats):
110 """"Checks whether the kindspats match everything, as e.g.
110 """"Checks whether the kindspats match everything, as e.g.
111 'relpath:.' does.
111 'relpath:.' does.
112 """
112 """
113 for kind, pat, source in kindpats:
113 for kind, pat, source in kindpats:
114 if pat != b'' or kind not in [b'relpath', b'glob']:
114 if pat != b'' or kind not in [b'relpath', b'glob']:
115 return False
115 return False
116 return True
116 return True
117
117
118
118
119 def _buildkindpatsmatcher(
119 def _buildkindpatsmatcher(
120 matchercls, root, kindpats, ctx=None, listsubrepos=False, badfn=None
120 matchercls, root, kindpats, ctx=None, listsubrepos=False, badfn=None
121 ):
121 ):
122 matchers = []
122 matchers = []
123 fms, kindpats = _expandsets(
123 fms, kindpats = _expandsets(
124 kindpats, ctx=ctx, listsubrepos=listsubrepos, badfn=badfn
124 kindpats, ctx=ctx, listsubrepos=listsubrepos, badfn=badfn
125 )
125 )
126 if kindpats:
126 if kindpats:
127 m = matchercls(root, kindpats, badfn=badfn)
127 m = matchercls(root, kindpats, badfn=badfn)
128 matchers.append(m)
128 matchers.append(m)
129 if fms:
129 if fms:
130 matchers.extend(fms)
130 matchers.extend(fms)
131 if not matchers:
131 if not matchers:
132 return nevermatcher(badfn=badfn)
132 return nevermatcher(badfn=badfn)
133 if len(matchers) == 1:
133 if len(matchers) == 1:
134 return matchers[0]
134 return matchers[0]
135 return unionmatcher(matchers)
135 return unionmatcher(matchers)
136
136
137
137
138 def match(
138 def match(
139 root,
139 root,
140 cwd,
140 cwd,
141 patterns=None,
141 patterns=None,
142 include=None,
142 include=None,
143 exclude=None,
143 exclude=None,
144 default=b'glob',
144 default=b'glob',
145 auditor=None,
145 auditor=None,
146 ctx=None,
146 ctx=None,
147 listsubrepos=False,
147 listsubrepos=False,
148 warn=None,
148 warn=None,
149 badfn=None,
149 badfn=None,
150 icasefs=False,
150 icasefs=False,
151 ):
151 ):
152 r"""build an object to match a set of file patterns
152 r"""build an object to match a set of file patterns
153
153
154 arguments:
154 arguments:
155 root - the canonical root of the tree you're matching against
155 root - the canonical root of the tree you're matching against
156 cwd - the current working directory, if relevant
156 cwd - the current working directory, if relevant
157 patterns - patterns to find
157 patterns - patterns to find
158 include - patterns to include (unless they are excluded)
158 include - patterns to include (unless they are excluded)
159 exclude - patterns to exclude (even if they are included)
159 exclude - patterns to exclude (even if they are included)
160 default - if a pattern in patterns has no explicit type, assume this one
160 default - if a pattern in patterns has no explicit type, assume this one
161 auditor - optional path auditor
161 auditor - optional path auditor
162 ctx - optional changecontext
162 ctx - optional changecontext
163 listsubrepos - if True, recurse into subrepositories
163 listsubrepos - if True, recurse into subrepositories
164 warn - optional function used for printing warnings
164 warn - optional function used for printing warnings
165 badfn - optional bad() callback for this matcher instead of the default
165 badfn - optional bad() callback for this matcher instead of the default
166 icasefs - make a matcher for wdir on case insensitive filesystems, which
166 icasefs - make a matcher for wdir on case insensitive filesystems, which
167 normalizes the given patterns to the case in the filesystem
167 normalizes the given patterns to the case in the filesystem
168
168
169 a pattern is one of:
169 a pattern is one of:
170 'glob:<glob>' - a glob relative to cwd
170 'glob:<glob>' - a glob relative to cwd
171 're:<regexp>' - a regular expression
171 're:<regexp>' - a regular expression
172 'path:<path>' - a path relative to repository root, which is matched
172 'path:<path>' - a path relative to repository root, which is matched
173 recursively
173 recursively
174 'rootfilesin:<path>' - a path relative to repository root, which is
174 'rootfilesin:<path>' - a path relative to repository root, which is
175 matched non-recursively (will not match subdirectories)
175 matched non-recursively (will not match subdirectories)
176 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
176 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
177 'relpath:<path>' - a path relative to cwd
177 'relpath:<path>' - a path relative to cwd
178 'relre:<regexp>' - a regexp that needn't match the start of a name
178 'relre:<regexp>' - a regexp that needn't match the start of a name
179 'set:<fileset>' - a fileset expression
179 'set:<fileset>' - a fileset expression
180 'include:<path>' - a file of patterns to read and include
180 'include:<path>' - a file of patterns to read and include
181 'subinclude:<path>' - a file of patterns to match against files under
181 'subinclude:<path>' - a file of patterns to match against files under
182 the same directory
182 the same directory
183 '<something>' - a pattern of the specified default type
183 '<something>' - a pattern of the specified default type
184
184
185 Usually a patternmatcher is returned:
185 Usually a patternmatcher is returned:
186 >>> match(b'/foo', b'.', [b're:.*\.c$', b'path:foo/a', b'*.py'])
186 >>> match(b'/foo', b'.', [b're:.*\.c$', b'path:foo/a', b'*.py'])
187 <patternmatcher patterns='.*\\.c$|foo/a(?:/|$)|[^/]*\\.py$'>
187 <patternmatcher patterns='.*\\.c$|foo/a(?:/|$)|[^/]*\\.py$'>
188
188
189 Combining 'patterns' with 'include' (resp. 'exclude') gives an
189 Combining 'patterns' with 'include' (resp. 'exclude') gives an
190 intersectionmatcher (resp. a differencematcher):
190 intersectionmatcher (resp. a differencematcher):
191 >>> type(match(b'/foo', b'.', [b're:.*\.c$'], include=[b'path:lib']))
191 >>> type(match(b'/foo', b'.', [b're:.*\.c$'], include=[b'path:lib']))
192 <class 'mercurial.match.intersectionmatcher'>
192 <class 'mercurial.match.intersectionmatcher'>
193 >>> type(match(b'/foo', b'.', [b're:.*\.c$'], exclude=[b'path:build']))
193 >>> type(match(b'/foo', b'.', [b're:.*\.c$'], exclude=[b'path:build']))
194 <class 'mercurial.match.differencematcher'>
194 <class 'mercurial.match.differencematcher'>
195
195
196 Notice that, if 'patterns' is empty, an alwaysmatcher is returned:
196 Notice that, if 'patterns' is empty, an alwaysmatcher is returned:
197 >>> match(b'/foo', b'.', [])
197 >>> match(b'/foo', b'.', [])
198 <alwaysmatcher>
198 <alwaysmatcher>
199
199
200 The 'default' argument determines which kind of pattern is assumed if a
200 The 'default' argument determines which kind of pattern is assumed if a
201 pattern has no prefix:
201 pattern has no prefix:
202 >>> match(b'/foo', b'.', [b'.*\.c$'], default=b're')
202 >>> match(b'/foo', b'.', [b'.*\.c$'], default=b're')
203 <patternmatcher patterns='.*\\.c$'>
203 <patternmatcher patterns='.*\\.c$'>
204 >>> match(b'/foo', b'.', [b'main.py'], default=b'relpath')
204 >>> match(b'/foo', b'.', [b'main.py'], default=b'relpath')
205 <patternmatcher patterns='main\\.py(?:/|$)'>
205 <patternmatcher patterns='main\\.py(?:/|$)'>
206 >>> match(b'/foo', b'.', [b'main.py'], default=b're')
206 >>> match(b'/foo', b'.', [b'main.py'], default=b're')
207 <patternmatcher patterns='main.py'>
207 <patternmatcher patterns='main.py'>
208
208
209 The primary use of matchers is to check whether a value (usually a file
209 The primary use of matchers is to check whether a value (usually a file
210 name) matches againset one of the patterns given at initialization. There
210 name) matches againset one of the patterns given at initialization. There
211 are two ways of doing this check.
211 are two ways of doing this check.
212
212
213 >>> m = match(b'/foo', b'', [b're:.*\.c$', b'relpath:a'])
213 >>> m = match(b'/foo', b'', [b're:.*\.c$', b'relpath:a'])
214
214
215 1. Calling the matcher with a file name returns True if any pattern
215 1. Calling the matcher with a file name returns True if any pattern
216 matches that file name:
216 matches that file name:
217 >>> m(b'a')
217 >>> m(b'a')
218 True
218 True
219 >>> m(b'main.c')
219 >>> m(b'main.c')
220 True
220 True
221 >>> m(b'test.py')
221 >>> m(b'test.py')
222 False
222 False
223
223
224 2. Using the exact() method only returns True if the file name matches one
224 2. Using the exact() method only returns True if the file name matches one
225 of the exact patterns (i.e. not re: or glob: patterns):
225 of the exact patterns (i.e. not re: or glob: patterns):
226 >>> m.exact(b'a')
226 >>> m.exact(b'a')
227 True
227 True
228 >>> m.exact(b'main.c')
228 >>> m.exact(b'main.c')
229 False
229 False
230 """
230 """
231 assert os.path.isabs(root)
231 assert os.path.isabs(root)
232 cwd = util.normpath(os.path.join(root, cwd))
232 normalize = _donormalize
233 normalize = _donormalize
233 if icasefs:
234 if icasefs:
234 dirstate = ctx.repo().dirstate
235 dirstate = ctx.repo().dirstate
235 dsnormalize = dirstate.normalize
236 dsnormalize = dirstate.normalize
236
237
237 def normalize(patterns, default, root, cwd, auditor, warn):
238 def normalize(patterns, default, root, cwd, auditor, warn):
238 kp = _donormalize(patterns, default, root, cwd, auditor, warn)
239 kp = _donormalize(patterns, default, root, cwd, auditor, warn)
239 kindpats = []
240 kindpats = []
240 for kind, pats, source in kp:
241 for kind, pats, source in kp:
241 if kind not in (b're', b'relre'): # regex can't be normalized
242 if kind not in (b're', b'relre'): # regex can't be normalized
242 p = pats
243 p = pats
243 pats = dsnormalize(pats)
244 pats = dsnormalize(pats)
244
245
245 # Preserve the original to handle a case only rename.
246 # Preserve the original to handle a case only rename.
246 if p != pats and p in dirstate:
247 if p != pats and p in dirstate:
247 kindpats.append((kind, p, source))
248 kindpats.append((kind, p, source))
248
249
249 kindpats.append((kind, pats, source))
250 kindpats.append((kind, pats, source))
250 return kindpats
251 return kindpats
251
252
252 if patterns:
253 if patterns:
253 kindpats = normalize(patterns, default, root, cwd, auditor, warn)
254 kindpats = normalize(patterns, default, root, cwd, auditor, warn)
254 if _kindpatsalwaysmatch(kindpats):
255 if _kindpatsalwaysmatch(kindpats):
255 m = alwaysmatcher(badfn)
256 m = alwaysmatcher(badfn)
256 else:
257 else:
257 m = _buildkindpatsmatcher(
258 m = _buildkindpatsmatcher(
258 patternmatcher,
259 patternmatcher,
259 root,
260 root,
260 kindpats,
261 kindpats,
261 ctx=ctx,
262 ctx=ctx,
262 listsubrepos=listsubrepos,
263 listsubrepos=listsubrepos,
263 badfn=badfn,
264 badfn=badfn,
264 )
265 )
265 else:
266 else:
266 # It's a little strange that no patterns means to match everything.
267 # It's a little strange that no patterns means to match everything.
267 # Consider changing this to match nothing (probably using nevermatcher).
268 # Consider changing this to match nothing (probably using nevermatcher).
268 m = alwaysmatcher(badfn)
269 m = alwaysmatcher(badfn)
269
270
270 if include:
271 if include:
271 kindpats = normalize(include, b'glob', root, cwd, auditor, warn)
272 kindpats = normalize(include, b'glob', root, cwd, auditor, warn)
272 im = _buildkindpatsmatcher(
273 im = _buildkindpatsmatcher(
273 includematcher,
274 includematcher,
274 root,
275 root,
275 kindpats,
276 kindpats,
276 ctx=ctx,
277 ctx=ctx,
277 listsubrepos=listsubrepos,
278 listsubrepos=listsubrepos,
278 badfn=None,
279 badfn=None,
279 )
280 )
280 m = intersectmatchers(m, im)
281 m = intersectmatchers(m, im)
281 if exclude:
282 if exclude:
282 kindpats = normalize(exclude, b'glob', root, cwd, auditor, warn)
283 kindpats = normalize(exclude, b'glob', root, cwd, auditor, warn)
283 em = _buildkindpatsmatcher(
284 em = _buildkindpatsmatcher(
284 includematcher,
285 includematcher,
285 root,
286 root,
286 kindpats,
287 kindpats,
287 ctx=ctx,
288 ctx=ctx,
288 listsubrepos=listsubrepos,
289 listsubrepos=listsubrepos,
289 badfn=None,
290 badfn=None,
290 )
291 )
291 m = differencematcher(m, em)
292 m = differencematcher(m, em)
292 return m
293 return m
293
294
294
295
295 def exact(files, badfn=None):
296 def exact(files, badfn=None):
296 return exactmatcher(files, badfn=badfn)
297 return exactmatcher(files, badfn=badfn)
297
298
298
299
299 def always(badfn=None):
300 def always(badfn=None):
300 return alwaysmatcher(badfn)
301 return alwaysmatcher(badfn)
301
302
302
303
303 def never(badfn=None):
304 def never(badfn=None):
304 return nevermatcher(badfn)
305 return nevermatcher(badfn)
305
306
306
307
307 def badmatch(match, badfn):
308 def badmatch(match, badfn):
308 """Make a copy of the given matcher, replacing its bad method with the given
309 """Make a copy of the given matcher, replacing its bad method with the given
309 one.
310 one.
310 """
311 """
311 m = copy.copy(match)
312 m = copy.copy(match)
312 m.bad = badfn
313 m.bad = badfn
313 return m
314 return m
314
315
315
316
316 def _donormalize(patterns, default, root, cwd, auditor=None, warn=None):
317 def _donormalize(patterns, default, root, cwd, auditor=None, warn=None):
317 '''Convert 'kind:pat' from the patterns list to tuples with kind and
318 '''Convert 'kind:pat' from the patterns list to tuples with kind and
318 normalized and rooted patterns and with listfiles expanded.'''
319 normalized and rooted patterns and with listfiles expanded.'''
319 kindpats = []
320 kindpats = []
320 for kind, pat in [_patsplit(p, default) for p in patterns]:
321 for kind, pat in [_patsplit(p, default) for p in patterns]:
321 if kind in cwdrelativepatternkinds:
322 if kind in cwdrelativepatternkinds:
322 pat = pathutil.canonpath(root, cwd, pat, auditor=auditor)
323 pat = pathutil.canonpath(root, cwd, pat, auditor=auditor)
323 elif kind in (b'relglob', b'path', b'rootfilesin', b'rootglob'):
324 elif kind in (b'relglob', b'path', b'rootfilesin', b'rootglob'):
324 pat = util.normpath(pat)
325 pat = util.normpath(pat)
325 elif kind in (b'listfile', b'listfile0'):
326 elif kind in (b'listfile', b'listfile0'):
326 try:
327 try:
327 files = util.readfile(pat)
328 files = util.readfile(pat)
328 if kind == b'listfile0':
329 if kind == b'listfile0':
329 files = files.split(b'\0')
330 files = files.split(b'\0')
330 else:
331 else:
331 files = files.splitlines()
332 files = files.splitlines()
332 files = [f for f in files if f]
333 files = [f for f in files if f]
333 except EnvironmentError:
334 except EnvironmentError:
334 raise error.Abort(_(b"unable to read file list (%s)") % pat)
335 raise error.Abort(_(b"unable to read file list (%s)") % pat)
335 for k, p, source in _donormalize(
336 for k, p, source in _donormalize(
336 files, default, root, cwd, auditor, warn
337 files, default, root, cwd, auditor, warn
337 ):
338 ):
338 kindpats.append((k, p, pat))
339 kindpats.append((k, p, pat))
339 continue
340 continue
340 elif kind == b'include':
341 elif kind == b'include':
341 try:
342 try:
342 fullpath = os.path.join(root, util.localpath(pat))
343 fullpath = os.path.join(root, util.localpath(pat))
343 includepats = readpatternfile(fullpath, warn)
344 includepats = readpatternfile(fullpath, warn)
344 for k, p, source in _donormalize(
345 for k, p, source in _donormalize(
345 includepats, default, root, cwd, auditor, warn
346 includepats, default, root, cwd, auditor, warn
346 ):
347 ):
347 kindpats.append((k, p, source or pat))
348 kindpats.append((k, p, source or pat))
348 except error.Abort as inst:
349 except error.Abort as inst:
349 raise error.Abort(
350 raise error.Abort(
350 b'%s: %s'
351 b'%s: %s'
351 % (pat, inst[0]) # pytype: disable=unsupported-operands
352 % (pat, inst[0]) # pytype: disable=unsupported-operands
352 )
353 )
353 except IOError as inst:
354 except IOError as inst:
354 if warn:
355 if warn:
355 warn(
356 warn(
356 _(b"skipping unreadable pattern file '%s': %s\n")
357 _(b"skipping unreadable pattern file '%s': %s\n")
357 % (pat, stringutil.forcebytestr(inst.strerror))
358 % (pat, stringutil.forcebytestr(inst.strerror))
358 )
359 )
359 continue
360 continue
360 # else: re or relre - which cannot be normalized
361 # else: re or relre - which cannot be normalized
361 kindpats.append((kind, pat, b''))
362 kindpats.append((kind, pat, b''))
362 return kindpats
363 return kindpats
363
364
364
365
365 class basematcher(object):
366 class basematcher(object):
366 def __init__(self, badfn=None):
367 def __init__(self, badfn=None):
367 if badfn is not None:
368 if badfn is not None:
368 self.bad = badfn
369 self.bad = badfn
369
370
370 def __call__(self, fn):
371 def __call__(self, fn):
371 return self.matchfn(fn)
372 return self.matchfn(fn)
372
373
373 # Callbacks related to how the matcher is used by dirstate.walk.
374 # Callbacks related to how the matcher is used by dirstate.walk.
374 # Subscribers to these events must monkeypatch the matcher object.
375 # Subscribers to these events must monkeypatch the matcher object.
375 def bad(self, f, msg):
376 def bad(self, f, msg):
376 '''Callback from dirstate.walk for each explicit file that can't be
377 '''Callback from dirstate.walk for each explicit file that can't be
377 found/accessed, with an error message.'''
378 found/accessed, with an error message.'''
378
379
379 # If an traversedir is set, it will be called when a directory discovered
380 # If an traversedir is set, it will be called when a directory discovered
380 # by recursive traversal is visited.
381 # by recursive traversal is visited.
381 traversedir = None
382 traversedir = None
382
383
383 @propertycache
384 @propertycache
384 def _files(self):
385 def _files(self):
385 return []
386 return []
386
387
387 def files(self):
388 def files(self):
388 '''Explicitly listed files or patterns or roots:
389 '''Explicitly listed files or patterns or roots:
389 if no patterns or .always(): empty list,
390 if no patterns or .always(): empty list,
390 if exact: list exact files,
391 if exact: list exact files,
391 if not .anypats(): list all files and dirs,
392 if not .anypats(): list all files and dirs,
392 else: optimal roots'''
393 else: optimal roots'''
393 return self._files
394 return self._files
394
395
395 @propertycache
396 @propertycache
396 def _fileset(self):
397 def _fileset(self):
397 return set(self._files)
398 return set(self._files)
398
399
399 def exact(self, f):
400 def exact(self, f):
400 '''Returns True if f is in .files().'''
401 '''Returns True if f is in .files().'''
401 return f in self._fileset
402 return f in self._fileset
402
403
403 def matchfn(self, f):
404 def matchfn(self, f):
404 return False
405 return False
405
406
406 def visitdir(self, dir):
407 def visitdir(self, dir):
407 '''Decides whether a directory should be visited based on whether it
408 '''Decides whether a directory should be visited based on whether it
408 has potential matches in it or one of its subdirectories. This is
409 has potential matches in it or one of its subdirectories. This is
409 based on the match's primary, included, and excluded patterns.
410 based on the match's primary, included, and excluded patterns.
410
411
411 Returns the string 'all' if the given directory and all subdirectories
412 Returns the string 'all' if the given directory and all subdirectories
412 should be visited. Otherwise returns True or False indicating whether
413 should be visited. Otherwise returns True or False indicating whether
413 the given directory should be visited.
414 the given directory should be visited.
414 '''
415 '''
415 return True
416 return True
416
417
417 def visitchildrenset(self, dir):
418 def visitchildrenset(self, dir):
418 '''Decides whether a directory should be visited based on whether it
419 '''Decides whether a directory should be visited based on whether it
419 has potential matches in it or one of its subdirectories, and
420 has potential matches in it or one of its subdirectories, and
420 potentially lists which subdirectories of that directory should be
421 potentially lists which subdirectories of that directory should be
421 visited. This is based on the match's primary, included, and excluded
422 visited. This is based on the match's primary, included, and excluded
422 patterns.
423 patterns.
423
424
424 This function is very similar to 'visitdir', and the following mapping
425 This function is very similar to 'visitdir', and the following mapping
425 can be applied:
426 can be applied:
426
427
427 visitdir | visitchildrenlist
428 visitdir | visitchildrenlist
428 ----------+-------------------
429 ----------+-------------------
429 False | set()
430 False | set()
430 'all' | 'all'
431 'all' | 'all'
431 True | 'this' OR non-empty set of subdirs -or files- to visit
432 True | 'this' OR non-empty set of subdirs -or files- to visit
432
433
433 Example:
434 Example:
434 Assume matchers ['path:foo/bar', 'rootfilesin:qux'], we would return
435 Assume matchers ['path:foo/bar', 'rootfilesin:qux'], we would return
435 the following values (assuming the implementation of visitchildrenset
436 the following values (assuming the implementation of visitchildrenset
436 is capable of recognizing this; some implementations are not).
437 is capable of recognizing this; some implementations are not).
437
438
438 '' -> {'foo', 'qux'}
439 '' -> {'foo', 'qux'}
439 'baz' -> set()
440 'baz' -> set()
440 'foo' -> {'bar'}
441 'foo' -> {'bar'}
441 # Ideally this would be 'all', but since the prefix nature of matchers
442 # Ideally this would be 'all', but since the prefix nature of matchers
442 # is applied to the entire matcher, we have to downgrade this to
443 # is applied to the entire matcher, we have to downgrade this to
443 # 'this' due to the non-prefix 'rootfilesin'-kind matcher being mixed
444 # 'this' due to the non-prefix 'rootfilesin'-kind matcher being mixed
444 # in.
445 # in.
445 'foo/bar' -> 'this'
446 'foo/bar' -> 'this'
446 'qux' -> 'this'
447 'qux' -> 'this'
447
448
448 Important:
449 Important:
449 Most matchers do not know if they're representing files or
450 Most matchers do not know if they're representing files or
450 directories. They see ['path:dir/f'] and don't know whether 'f' is a
451 directories. They see ['path:dir/f'] and don't know whether 'f' is a
451 file or a directory, so visitchildrenset('dir') for most matchers will
452 file or a directory, so visitchildrenset('dir') for most matchers will
452 return {'f'}, but if the matcher knows it's a file (like exactmatcher
453 return {'f'}, but if the matcher knows it's a file (like exactmatcher
453 does), it may return 'this'. Do not rely on the return being a set
454 does), it may return 'this'. Do not rely on the return being a set
454 indicating that there are no files in this dir to investigate (or
455 indicating that there are no files in this dir to investigate (or
455 equivalently that if there are files to investigate in 'dir' that it
456 equivalently that if there are files to investigate in 'dir' that it
456 will always return 'this').
457 will always return 'this').
457 '''
458 '''
458 return b'this'
459 return b'this'
459
460
460 def always(self):
461 def always(self):
461 '''Matcher will match everything and .files() will be empty --
462 '''Matcher will match everything and .files() will be empty --
462 optimization might be possible.'''
463 optimization might be possible.'''
463 return False
464 return False
464
465
465 def isexact(self):
466 def isexact(self):
466 '''Matcher will match exactly the list of files in .files() --
467 '''Matcher will match exactly the list of files in .files() --
467 optimization might be possible.'''
468 optimization might be possible.'''
468 return False
469 return False
469
470
470 def prefix(self):
471 def prefix(self):
471 '''Matcher will match the paths in .files() recursively --
472 '''Matcher will match the paths in .files() recursively --
472 optimization might be possible.'''
473 optimization might be possible.'''
473 return False
474 return False
474
475
475 def anypats(self):
476 def anypats(self):
476 '''None of .always(), .isexact(), and .prefix() is true --
477 '''None of .always(), .isexact(), and .prefix() is true --
477 optimizations will be difficult.'''
478 optimizations will be difficult.'''
478 return not self.always() and not self.isexact() and not self.prefix()
479 return not self.always() and not self.isexact() and not self.prefix()
479
480
480
481
481 class alwaysmatcher(basematcher):
482 class alwaysmatcher(basematcher):
482 '''Matches everything.'''
483 '''Matches everything.'''
483
484
484 def __init__(self, badfn=None):
485 def __init__(self, badfn=None):
485 super(alwaysmatcher, self).__init__(badfn)
486 super(alwaysmatcher, self).__init__(badfn)
486
487
487 def always(self):
488 def always(self):
488 return True
489 return True
489
490
490 def matchfn(self, f):
491 def matchfn(self, f):
491 return True
492 return True
492
493
493 def visitdir(self, dir):
494 def visitdir(self, dir):
494 return b'all'
495 return b'all'
495
496
496 def visitchildrenset(self, dir):
497 def visitchildrenset(self, dir):
497 return b'all'
498 return b'all'
498
499
499 def __repr__(self):
500 def __repr__(self):
500 return r'<alwaysmatcher>'
501 return r'<alwaysmatcher>'
501
502
502
503
503 class nevermatcher(basematcher):
504 class nevermatcher(basematcher):
504 '''Matches nothing.'''
505 '''Matches nothing.'''
505
506
506 def __init__(self, badfn=None):
507 def __init__(self, badfn=None):
507 super(nevermatcher, self).__init__(badfn)
508 super(nevermatcher, self).__init__(badfn)
508
509
509 # It's a little weird to say that the nevermatcher is an exact matcher
510 # It's a little weird to say that the nevermatcher is an exact matcher
510 # or a prefix matcher, but it seems to make sense to let callers take
511 # or a prefix matcher, but it seems to make sense to let callers take
511 # fast paths based on either. There will be no exact matches, nor any
512 # fast paths based on either. There will be no exact matches, nor any
512 # prefixes (files() returns []), so fast paths iterating over them should
513 # prefixes (files() returns []), so fast paths iterating over them should
513 # be efficient (and correct).
514 # be efficient (and correct).
514 def isexact(self):
515 def isexact(self):
515 return True
516 return True
516
517
517 def prefix(self):
518 def prefix(self):
518 return True
519 return True
519
520
520 def visitdir(self, dir):
521 def visitdir(self, dir):
521 return False
522 return False
522
523
523 def visitchildrenset(self, dir):
524 def visitchildrenset(self, dir):
524 return set()
525 return set()
525
526
526 def __repr__(self):
527 def __repr__(self):
527 return r'<nevermatcher>'
528 return r'<nevermatcher>'
528
529
529
530
530 class predicatematcher(basematcher):
531 class predicatematcher(basematcher):
531 """A matcher adapter for a simple boolean function"""
532 """A matcher adapter for a simple boolean function"""
532
533
533 def __init__(self, predfn, predrepr=None, badfn=None):
534 def __init__(self, predfn, predrepr=None, badfn=None):
534 super(predicatematcher, self).__init__(badfn)
535 super(predicatematcher, self).__init__(badfn)
535 self.matchfn = predfn
536 self.matchfn = predfn
536 self._predrepr = predrepr
537 self._predrepr = predrepr
537
538
538 @encoding.strmethod
539 @encoding.strmethod
539 def __repr__(self):
540 def __repr__(self):
540 s = stringutil.buildrepr(self._predrepr) or pycompat.byterepr(
541 s = stringutil.buildrepr(self._predrepr) or pycompat.byterepr(
541 self.matchfn
542 self.matchfn
542 )
543 )
543 return b'<predicatenmatcher pred=%s>' % s
544 return b'<predicatenmatcher pred=%s>' % s
544
545
545
546
546 class patternmatcher(basematcher):
547 class patternmatcher(basematcher):
547 r"""Matches a set of (kind, pat, source) against a 'root' directory.
548 r"""Matches a set of (kind, pat, source) against a 'root' directory.
548
549
549 >>> kindpats = [
550 >>> kindpats = [
550 ... (b're', br'.*\.c$', b''),
551 ... (b're', br'.*\.c$', b''),
551 ... (b'path', b'foo/a', b''),
552 ... (b'path', b'foo/a', b''),
552 ... (b'relpath', b'b', b''),
553 ... (b'relpath', b'b', b''),
553 ... (b'glob', b'*.h', b''),
554 ... (b'glob', b'*.h', b''),
554 ... ]
555 ... ]
555 >>> m = patternmatcher(b'foo', kindpats)
556 >>> m = patternmatcher(b'foo', kindpats)
556 >>> m(b'main.c') # matches re:.*\.c$
557 >>> m(b'main.c') # matches re:.*\.c$
557 True
558 True
558 >>> m(b'b.txt')
559 >>> m(b'b.txt')
559 False
560 False
560 >>> m(b'foo/a') # matches path:foo/a
561 >>> m(b'foo/a') # matches path:foo/a
561 True
562 True
562 >>> m(b'a') # does not match path:b, since 'root' is 'foo'
563 >>> m(b'a') # does not match path:b, since 'root' is 'foo'
563 False
564 False
564 >>> m(b'b') # matches relpath:b, since 'root' is 'foo'
565 >>> m(b'b') # matches relpath:b, since 'root' is 'foo'
565 True
566 True
566 >>> m(b'lib.h') # matches glob:*.h
567 >>> m(b'lib.h') # matches glob:*.h
567 True
568 True
568
569
569 >>> m.files()
570 >>> m.files()
570 ['', 'foo/a', 'b', '']
571 ['', 'foo/a', 'b', '']
571 >>> m.exact(b'foo/a')
572 >>> m.exact(b'foo/a')
572 True
573 True
573 >>> m.exact(b'b')
574 >>> m.exact(b'b')
574 True
575 True
575 >>> m.exact(b'lib.h') # exact matches are for (rel)path kinds
576 >>> m.exact(b'lib.h') # exact matches are for (rel)path kinds
576 False
577 False
577 """
578 """
578
579
579 def __init__(self, root, kindpats, badfn=None):
580 def __init__(self, root, kindpats, badfn=None):
580 super(patternmatcher, self).__init__(badfn)
581 super(patternmatcher, self).__init__(badfn)
581
582
582 self._files = _explicitfiles(kindpats)
583 self._files = _explicitfiles(kindpats)
583 self._prefix = _prefix(kindpats)
584 self._prefix = _prefix(kindpats)
584 self._pats, self.matchfn = _buildmatch(kindpats, b'$', root)
585 self._pats, self.matchfn = _buildmatch(kindpats, b'$', root)
585
586
586 @propertycache
587 @propertycache
587 def _dirs(self):
588 def _dirs(self):
588 return set(pathutil.dirs(self._fileset))
589 return set(pathutil.dirs(self._fileset))
589
590
590 def visitdir(self, dir):
591 def visitdir(self, dir):
591 if self._prefix and dir in self._fileset:
592 if self._prefix and dir in self._fileset:
592 return b'all'
593 return b'all'
593 return (
594 return (
594 dir in self._fileset
595 dir in self._fileset
595 or dir in self._dirs
596 or dir in self._dirs
596 or any(
597 or any(
597 parentdir in self._fileset
598 parentdir in self._fileset
598 for parentdir in pathutil.finddirs(dir)
599 for parentdir in pathutil.finddirs(dir)
599 )
600 )
600 )
601 )
601
602
602 def visitchildrenset(self, dir):
603 def visitchildrenset(self, dir):
603 ret = self.visitdir(dir)
604 ret = self.visitdir(dir)
604 if ret is True:
605 if ret is True:
605 return b'this'
606 return b'this'
606 elif not ret:
607 elif not ret:
607 return set()
608 return set()
608 assert ret == b'all'
609 assert ret == b'all'
609 return b'all'
610 return b'all'
610
611
611 def prefix(self):
612 def prefix(self):
612 return self._prefix
613 return self._prefix
613
614
614 @encoding.strmethod
615 @encoding.strmethod
615 def __repr__(self):
616 def __repr__(self):
616 return b'<patternmatcher patterns=%r>' % pycompat.bytestr(self._pats)
617 return b'<patternmatcher patterns=%r>' % pycompat.bytestr(self._pats)
617
618
618
619
619 # This is basically a reimplementation of pathutil.dirs that stores the
620 # This is basically a reimplementation of pathutil.dirs that stores the
620 # children instead of just a count of them, plus a small optional optimization
621 # children instead of just a count of them, plus a small optional optimization
621 # to avoid some directories we don't need.
622 # to avoid some directories we don't need.
622 class _dirchildren(object):
623 class _dirchildren(object):
623 def __init__(self, paths, onlyinclude=None):
624 def __init__(self, paths, onlyinclude=None):
624 self._dirs = {}
625 self._dirs = {}
625 self._onlyinclude = onlyinclude or []
626 self._onlyinclude = onlyinclude or []
626 addpath = self.addpath
627 addpath = self.addpath
627 for f in paths:
628 for f in paths:
628 addpath(f)
629 addpath(f)
629
630
630 def addpath(self, path):
631 def addpath(self, path):
631 if path == b'':
632 if path == b'':
632 return
633 return
633 dirs = self._dirs
634 dirs = self._dirs
634 findsplitdirs = _dirchildren._findsplitdirs
635 findsplitdirs = _dirchildren._findsplitdirs
635 for d, b in findsplitdirs(path):
636 for d, b in findsplitdirs(path):
636 if d not in self._onlyinclude:
637 if d not in self._onlyinclude:
637 continue
638 continue
638 dirs.setdefault(d, set()).add(b)
639 dirs.setdefault(d, set()).add(b)
639
640
640 @staticmethod
641 @staticmethod
641 def _findsplitdirs(path):
642 def _findsplitdirs(path):
642 # yields (dirname, basename) tuples, walking back to the root. This is
643 # yields (dirname, basename) tuples, walking back to the root. This is
643 # very similar to pathutil.finddirs, except:
644 # very similar to pathutil.finddirs, except:
644 # - produces a (dirname, basename) tuple, not just 'dirname'
645 # - produces a (dirname, basename) tuple, not just 'dirname'
645 # Unlike manifest._splittopdir, this does not suffix `dirname` with a
646 # Unlike manifest._splittopdir, this does not suffix `dirname` with a
646 # slash.
647 # slash.
647 oldpos = len(path)
648 oldpos = len(path)
648 pos = path.rfind(b'/')
649 pos = path.rfind(b'/')
649 while pos != -1:
650 while pos != -1:
650 yield path[:pos], path[pos + 1 : oldpos]
651 yield path[:pos], path[pos + 1 : oldpos]
651 oldpos = pos
652 oldpos = pos
652 pos = path.rfind(b'/', 0, pos)
653 pos = path.rfind(b'/', 0, pos)
653 yield b'', path[:oldpos]
654 yield b'', path[:oldpos]
654
655
655 def get(self, path):
656 def get(self, path):
656 return self._dirs.get(path, set())
657 return self._dirs.get(path, set())
657
658
658
659
659 class includematcher(basematcher):
660 class includematcher(basematcher):
660 def __init__(self, root, kindpats, badfn=None):
661 def __init__(self, root, kindpats, badfn=None):
661 super(includematcher, self).__init__(badfn)
662 super(includematcher, self).__init__(badfn)
662
663
663 self._pats, self.matchfn = _buildmatch(kindpats, b'(?:/|$)', root)
664 self._pats, self.matchfn = _buildmatch(kindpats, b'(?:/|$)', root)
664 self._prefix = _prefix(kindpats)
665 self._prefix = _prefix(kindpats)
665 roots, dirs, parents = _rootsdirsandparents(kindpats)
666 roots, dirs, parents = _rootsdirsandparents(kindpats)
666 # roots are directories which are recursively included.
667 # roots are directories which are recursively included.
667 self._roots = set(roots)
668 self._roots = set(roots)
668 # dirs are directories which are non-recursively included.
669 # dirs are directories which are non-recursively included.
669 self._dirs = set(dirs)
670 self._dirs = set(dirs)
670 # parents are directories which are non-recursively included because
671 # parents are directories which are non-recursively included because
671 # they are needed to get to items in _dirs or _roots.
672 # they are needed to get to items in _dirs or _roots.
672 self._parents = parents
673 self._parents = parents
673
674
674 def visitdir(self, dir):
675 def visitdir(self, dir):
675 if self._prefix and dir in self._roots:
676 if self._prefix and dir in self._roots:
676 return b'all'
677 return b'all'
677 return (
678 return (
678 dir in self._roots
679 dir in self._roots
679 or dir in self._dirs
680 or dir in self._dirs
680 or dir in self._parents
681 or dir in self._parents
681 or any(
682 or any(
682 parentdir in self._roots for parentdir in pathutil.finddirs(dir)
683 parentdir in self._roots for parentdir in pathutil.finddirs(dir)
683 )
684 )
684 )
685 )
685
686
686 @propertycache
687 @propertycache
687 def _allparentschildren(self):
688 def _allparentschildren(self):
688 # It may seem odd that we add dirs, roots, and parents, and then
689 # It may seem odd that we add dirs, roots, and parents, and then
689 # restrict to only parents. This is to catch the case of:
690 # restrict to only parents. This is to catch the case of:
690 # dirs = ['foo/bar']
691 # dirs = ['foo/bar']
691 # parents = ['foo']
692 # parents = ['foo']
692 # if we asked for the children of 'foo', but had only added
693 # if we asked for the children of 'foo', but had only added
693 # self._parents, we wouldn't be able to respond ['bar'].
694 # self._parents, we wouldn't be able to respond ['bar'].
694 return _dirchildren(
695 return _dirchildren(
695 itertools.chain(self._dirs, self._roots, self._parents),
696 itertools.chain(self._dirs, self._roots, self._parents),
696 onlyinclude=self._parents,
697 onlyinclude=self._parents,
697 )
698 )
698
699
699 def visitchildrenset(self, dir):
700 def visitchildrenset(self, dir):
700 if self._prefix and dir in self._roots:
701 if self._prefix and dir in self._roots:
701 return b'all'
702 return b'all'
702 # Note: this does *not* include the 'dir in self._parents' case from
703 # Note: this does *not* include the 'dir in self._parents' case from
703 # visitdir, that's handled below.
704 # visitdir, that's handled below.
704 if (
705 if (
705 b'' in self._roots
706 b'' in self._roots
706 or dir in self._roots
707 or dir in self._roots
707 or dir in self._dirs
708 or dir in self._dirs
708 or any(
709 or any(
709 parentdir in self._roots for parentdir in pathutil.finddirs(dir)
710 parentdir in self._roots for parentdir in pathutil.finddirs(dir)
710 )
711 )
711 ):
712 ):
712 return b'this'
713 return b'this'
713
714
714 if dir in self._parents:
715 if dir in self._parents:
715 return self._allparentschildren.get(dir) or set()
716 return self._allparentschildren.get(dir) or set()
716 return set()
717 return set()
717
718
718 @encoding.strmethod
719 @encoding.strmethod
719 def __repr__(self):
720 def __repr__(self):
720 return b'<includematcher includes=%r>' % pycompat.bytestr(self._pats)
721 return b'<includematcher includes=%r>' % pycompat.bytestr(self._pats)
721
722
722
723
723 class exactmatcher(basematcher):
724 class exactmatcher(basematcher):
724 r'''Matches the input files exactly. They are interpreted as paths, not
725 r'''Matches the input files exactly. They are interpreted as paths, not
725 patterns (so no kind-prefixes).
726 patterns (so no kind-prefixes).
726
727
727 >>> m = exactmatcher([b'a.txt', br're:.*\.c$'])
728 >>> m = exactmatcher([b'a.txt', br're:.*\.c$'])
728 >>> m(b'a.txt')
729 >>> m(b'a.txt')
729 True
730 True
730 >>> m(b'b.txt')
731 >>> m(b'b.txt')
731 False
732 False
732
733
733 Input files that would be matched are exactly those returned by .files()
734 Input files that would be matched are exactly those returned by .files()
734 >>> m.files()
735 >>> m.files()
735 ['a.txt', 're:.*\\.c$']
736 ['a.txt', 're:.*\\.c$']
736
737
737 So pattern 're:.*\.c$' is not considered as a regex, but as a file name
738 So pattern 're:.*\.c$' is not considered as a regex, but as a file name
738 >>> m(b'main.c')
739 >>> m(b'main.c')
739 False
740 False
740 >>> m(br're:.*\.c$')
741 >>> m(br're:.*\.c$')
741 True
742 True
742 '''
743 '''
743
744
744 def __init__(self, files, badfn=None):
745 def __init__(self, files, badfn=None):
745 super(exactmatcher, self).__init__(badfn)
746 super(exactmatcher, self).__init__(badfn)
746
747
747 if isinstance(files, list):
748 if isinstance(files, list):
748 self._files = files
749 self._files = files
749 else:
750 else:
750 self._files = list(files)
751 self._files = list(files)
751
752
752 matchfn = basematcher.exact
753 matchfn = basematcher.exact
753
754
754 @propertycache
755 @propertycache
755 def _dirs(self):
756 def _dirs(self):
756 return set(pathutil.dirs(self._fileset))
757 return set(pathutil.dirs(self._fileset))
757
758
758 def visitdir(self, dir):
759 def visitdir(self, dir):
759 return dir in self._dirs
760 return dir in self._dirs
760
761
761 def visitchildrenset(self, dir):
762 def visitchildrenset(self, dir):
762 if not self._fileset or dir not in self._dirs:
763 if not self._fileset or dir not in self._dirs:
763 return set()
764 return set()
764
765
765 candidates = self._fileset | self._dirs - {b''}
766 candidates = self._fileset | self._dirs - {b''}
766 if dir != b'':
767 if dir != b'':
767 d = dir + b'/'
768 d = dir + b'/'
768 candidates = set(c[len(d) :] for c in candidates if c.startswith(d))
769 candidates = set(c[len(d) :] for c in candidates if c.startswith(d))
769 # self._dirs includes all of the directories, recursively, so if
770 # self._dirs includes all of the directories, recursively, so if
770 # we're attempting to match foo/bar/baz.txt, it'll have '', 'foo',
771 # we're attempting to match foo/bar/baz.txt, it'll have '', 'foo',
771 # 'foo/bar' in it. Thus we can safely ignore a candidate that has a
772 # 'foo/bar' in it. Thus we can safely ignore a candidate that has a
772 # '/' in it, indicating a it's for a subdir-of-a-subdir; the
773 # '/' in it, indicating a it's for a subdir-of-a-subdir; the
773 # immediate subdir will be in there without a slash.
774 # immediate subdir will be in there without a slash.
774 ret = {c for c in candidates if b'/' not in c}
775 ret = {c for c in candidates if b'/' not in c}
775 # We really do not expect ret to be empty, since that would imply that
776 # We really do not expect ret to be empty, since that would imply that
776 # there's something in _dirs that didn't have a file in _fileset.
777 # there's something in _dirs that didn't have a file in _fileset.
777 assert ret
778 assert ret
778 return ret
779 return ret
779
780
780 def isexact(self):
781 def isexact(self):
781 return True
782 return True
782
783
783 @encoding.strmethod
784 @encoding.strmethod
784 def __repr__(self):
785 def __repr__(self):
785 return b'<exactmatcher files=%r>' % self._files
786 return b'<exactmatcher files=%r>' % self._files
786
787
787
788
788 class differencematcher(basematcher):
789 class differencematcher(basematcher):
789 '''Composes two matchers by matching if the first matches and the second
790 '''Composes two matchers by matching if the first matches and the second
790 does not.
791 does not.
791
792
792 The second matcher's non-matching-attributes (bad, traversedir) are ignored.
793 The second matcher's non-matching-attributes (bad, traversedir) are ignored.
793 '''
794 '''
794
795
795 def __init__(self, m1, m2):
796 def __init__(self, m1, m2):
796 super(differencematcher, self).__init__()
797 super(differencematcher, self).__init__()
797 self._m1 = m1
798 self._m1 = m1
798 self._m2 = m2
799 self._m2 = m2
799 self.bad = m1.bad
800 self.bad = m1.bad
800 self.traversedir = m1.traversedir
801 self.traversedir = m1.traversedir
801
802
802 def matchfn(self, f):
803 def matchfn(self, f):
803 return self._m1(f) and not self._m2(f)
804 return self._m1(f) and not self._m2(f)
804
805
805 @propertycache
806 @propertycache
806 def _files(self):
807 def _files(self):
807 if self.isexact():
808 if self.isexact():
808 return [f for f in self._m1.files() if self(f)]
809 return [f for f in self._m1.files() if self(f)]
809 # If m1 is not an exact matcher, we can't easily figure out the set of
810 # If m1 is not an exact matcher, we can't easily figure out the set of
810 # files, because its files() are not always files. For example, if
811 # files, because its files() are not always files. For example, if
811 # m1 is "path:dir" and m2 is "rootfileins:.", we don't
812 # m1 is "path:dir" and m2 is "rootfileins:.", we don't
812 # want to remove "dir" from the set even though it would match m2,
813 # want to remove "dir" from the set even though it would match m2,
813 # because the "dir" in m1 may not be a file.
814 # because the "dir" in m1 may not be a file.
814 return self._m1.files()
815 return self._m1.files()
815
816
816 def visitdir(self, dir):
817 def visitdir(self, dir):
817 if self._m2.visitdir(dir) == b'all':
818 if self._m2.visitdir(dir) == b'all':
818 return False
819 return False
819 elif not self._m2.visitdir(dir):
820 elif not self._m2.visitdir(dir):
820 # m2 does not match dir, we can return 'all' here if possible
821 # m2 does not match dir, we can return 'all' here if possible
821 return self._m1.visitdir(dir)
822 return self._m1.visitdir(dir)
822 return bool(self._m1.visitdir(dir))
823 return bool(self._m1.visitdir(dir))
823
824
824 def visitchildrenset(self, dir):
825 def visitchildrenset(self, dir):
825 m2_set = self._m2.visitchildrenset(dir)
826 m2_set = self._m2.visitchildrenset(dir)
826 if m2_set == b'all':
827 if m2_set == b'all':
827 return set()
828 return set()
828 m1_set = self._m1.visitchildrenset(dir)
829 m1_set = self._m1.visitchildrenset(dir)
829 # Possible values for m1: 'all', 'this', set(...), set()
830 # Possible values for m1: 'all', 'this', set(...), set()
830 # Possible values for m2: 'this', set(...), set()
831 # Possible values for m2: 'this', set(...), set()
831 # If m2 has nothing under here that we care about, return m1, even if
832 # If m2 has nothing under here that we care about, return m1, even if
832 # it's 'all'. This is a change in behavior from visitdir, which would
833 # it's 'all'. This is a change in behavior from visitdir, which would
833 # return True, not 'all', for some reason.
834 # return True, not 'all', for some reason.
834 if not m2_set:
835 if not m2_set:
835 return m1_set
836 return m1_set
836 if m1_set in [b'all', b'this']:
837 if m1_set in [b'all', b'this']:
837 # Never return 'all' here if m2_set is any kind of non-empty (either
838 # Never return 'all' here if m2_set is any kind of non-empty (either
838 # 'this' or set(foo)), since m2 might return set() for a
839 # 'this' or set(foo)), since m2 might return set() for a
839 # subdirectory.
840 # subdirectory.
840 return b'this'
841 return b'this'
841 # Possible values for m1: set(...), set()
842 # Possible values for m1: set(...), set()
842 # Possible values for m2: 'this', set(...)
843 # Possible values for m2: 'this', set(...)
843 # We ignore m2's set results. They're possibly incorrect:
844 # We ignore m2's set results. They're possibly incorrect:
844 # m1 = path:dir/subdir, m2=rootfilesin:dir, visitchildrenset(''):
845 # m1 = path:dir/subdir, m2=rootfilesin:dir, visitchildrenset(''):
845 # m1 returns {'dir'}, m2 returns {'dir'}, if we subtracted we'd
846 # m1 returns {'dir'}, m2 returns {'dir'}, if we subtracted we'd
846 # return set(), which is *not* correct, we still need to visit 'dir'!
847 # return set(), which is *not* correct, we still need to visit 'dir'!
847 return m1_set
848 return m1_set
848
849
849 def isexact(self):
850 def isexact(self):
850 return self._m1.isexact()
851 return self._m1.isexact()
851
852
852 @encoding.strmethod
853 @encoding.strmethod
853 def __repr__(self):
854 def __repr__(self):
854 return b'<differencematcher m1=%r, m2=%r>' % (self._m1, self._m2)
855 return b'<differencematcher m1=%r, m2=%r>' % (self._m1, self._m2)
855
856
856
857
857 def intersectmatchers(m1, m2):
858 def intersectmatchers(m1, m2):
858 '''Composes two matchers by matching if both of them match.
859 '''Composes two matchers by matching if both of them match.
859
860
860 The second matcher's non-matching-attributes (bad, traversedir) are ignored.
861 The second matcher's non-matching-attributes (bad, traversedir) are ignored.
861 '''
862 '''
862 if m1 is None or m2 is None:
863 if m1 is None or m2 is None:
863 return m1 or m2
864 return m1 or m2
864 if m1.always():
865 if m1.always():
865 m = copy.copy(m2)
866 m = copy.copy(m2)
866 # TODO: Consider encapsulating these things in a class so there's only
867 # TODO: Consider encapsulating these things in a class so there's only
867 # one thing to copy from m1.
868 # one thing to copy from m1.
868 m.bad = m1.bad
869 m.bad = m1.bad
869 m.traversedir = m1.traversedir
870 m.traversedir = m1.traversedir
870 return m
871 return m
871 if m2.always():
872 if m2.always():
872 m = copy.copy(m1)
873 m = copy.copy(m1)
873 return m
874 return m
874 return intersectionmatcher(m1, m2)
875 return intersectionmatcher(m1, m2)
875
876
876
877
877 class intersectionmatcher(basematcher):
878 class intersectionmatcher(basematcher):
878 def __init__(self, m1, m2):
879 def __init__(self, m1, m2):
879 super(intersectionmatcher, self).__init__()
880 super(intersectionmatcher, self).__init__()
880 self._m1 = m1
881 self._m1 = m1
881 self._m2 = m2
882 self._m2 = m2
882 self.bad = m1.bad
883 self.bad = m1.bad
883 self.traversedir = m1.traversedir
884 self.traversedir = m1.traversedir
884
885
885 @propertycache
886 @propertycache
886 def _files(self):
887 def _files(self):
887 if self.isexact():
888 if self.isexact():
888 m1, m2 = self._m1, self._m2
889 m1, m2 = self._m1, self._m2
889 if not m1.isexact():
890 if not m1.isexact():
890 m1, m2 = m2, m1
891 m1, m2 = m2, m1
891 return [f for f in m1.files() if m2(f)]
892 return [f for f in m1.files() if m2(f)]
892 # It neither m1 nor m2 is an exact matcher, we can't easily intersect
893 # It neither m1 nor m2 is an exact matcher, we can't easily intersect
893 # the set of files, because their files() are not always files. For
894 # the set of files, because their files() are not always files. For
894 # example, if intersecting a matcher "-I glob:foo.txt" with matcher of
895 # example, if intersecting a matcher "-I glob:foo.txt" with matcher of
895 # "path:dir2", we don't want to remove "dir2" from the set.
896 # "path:dir2", we don't want to remove "dir2" from the set.
896 return self._m1.files() + self._m2.files()
897 return self._m1.files() + self._m2.files()
897
898
898 def matchfn(self, f):
899 def matchfn(self, f):
899 return self._m1(f) and self._m2(f)
900 return self._m1(f) and self._m2(f)
900
901
901 def visitdir(self, dir):
902 def visitdir(self, dir):
902 visit1 = self._m1.visitdir(dir)
903 visit1 = self._m1.visitdir(dir)
903 if visit1 == b'all':
904 if visit1 == b'all':
904 return self._m2.visitdir(dir)
905 return self._m2.visitdir(dir)
905 # bool() because visit1=True + visit2='all' should not be 'all'
906 # bool() because visit1=True + visit2='all' should not be 'all'
906 return bool(visit1 and self._m2.visitdir(dir))
907 return bool(visit1 and self._m2.visitdir(dir))
907
908
908 def visitchildrenset(self, dir):
909 def visitchildrenset(self, dir):
909 m1_set = self._m1.visitchildrenset(dir)
910 m1_set = self._m1.visitchildrenset(dir)
910 if not m1_set:
911 if not m1_set:
911 return set()
912 return set()
912 m2_set = self._m2.visitchildrenset(dir)
913 m2_set = self._m2.visitchildrenset(dir)
913 if not m2_set:
914 if not m2_set:
914 return set()
915 return set()
915
916
916 if m1_set == b'all':
917 if m1_set == b'all':
917 return m2_set
918 return m2_set
918 elif m2_set == b'all':
919 elif m2_set == b'all':
919 return m1_set
920 return m1_set
920
921
921 if m1_set == b'this' or m2_set == b'this':
922 if m1_set == b'this' or m2_set == b'this':
922 return b'this'
923 return b'this'
923
924
924 assert isinstance(m1_set, set) and isinstance(m2_set, set)
925 assert isinstance(m1_set, set) and isinstance(m2_set, set)
925 return m1_set.intersection(m2_set)
926 return m1_set.intersection(m2_set)
926
927
927 def always(self):
928 def always(self):
928 return self._m1.always() and self._m2.always()
929 return self._m1.always() and self._m2.always()
929
930
930 def isexact(self):
931 def isexact(self):
931 return self._m1.isexact() or self._m2.isexact()
932 return self._m1.isexact() or self._m2.isexact()
932
933
933 @encoding.strmethod
934 @encoding.strmethod
934 def __repr__(self):
935 def __repr__(self):
935 return b'<intersectionmatcher m1=%r, m2=%r>' % (self._m1, self._m2)
936 return b'<intersectionmatcher m1=%r, m2=%r>' % (self._m1, self._m2)
936
937
937
938
938 class subdirmatcher(basematcher):
939 class subdirmatcher(basematcher):
939 """Adapt a matcher to work on a subdirectory only.
940 """Adapt a matcher to work on a subdirectory only.
940
941
941 The paths are remapped to remove/insert the path as needed:
942 The paths are remapped to remove/insert the path as needed:
942
943
943 >>> from . import pycompat
944 >>> from . import pycompat
944 >>> m1 = match(b'/root', b'', [b'a.txt', b'sub/b.txt'])
945 >>> m1 = match(b'/root', b'', [b'a.txt', b'sub/b.txt'])
945 >>> m2 = subdirmatcher(b'sub', m1)
946 >>> m2 = subdirmatcher(b'sub', m1)
946 >>> m2(b'a.txt')
947 >>> m2(b'a.txt')
947 False
948 False
948 >>> m2(b'b.txt')
949 >>> m2(b'b.txt')
949 True
950 True
950 >>> m2.matchfn(b'a.txt')
951 >>> m2.matchfn(b'a.txt')
951 False
952 False
952 >>> m2.matchfn(b'b.txt')
953 >>> m2.matchfn(b'b.txt')
953 True
954 True
954 >>> m2.files()
955 >>> m2.files()
955 ['b.txt']
956 ['b.txt']
956 >>> m2.exact(b'b.txt')
957 >>> m2.exact(b'b.txt')
957 True
958 True
958 >>> def bad(f, msg):
959 >>> def bad(f, msg):
959 ... print(pycompat.sysstr(b"%s: %s" % (f, msg)))
960 ... print(pycompat.sysstr(b"%s: %s" % (f, msg)))
960 >>> m1.bad = bad
961 >>> m1.bad = bad
961 >>> m2.bad(b'x.txt', b'No such file')
962 >>> m2.bad(b'x.txt', b'No such file')
962 sub/x.txt: No such file
963 sub/x.txt: No such file
963 """
964 """
964
965
965 def __init__(self, path, matcher):
966 def __init__(self, path, matcher):
966 super(subdirmatcher, self).__init__()
967 super(subdirmatcher, self).__init__()
967 self._path = path
968 self._path = path
968 self._matcher = matcher
969 self._matcher = matcher
969 self._always = matcher.always()
970 self._always = matcher.always()
970
971
971 self._files = [
972 self._files = [
972 f[len(path) + 1 :]
973 f[len(path) + 1 :]
973 for f in matcher._files
974 for f in matcher._files
974 if f.startswith(path + b"/")
975 if f.startswith(path + b"/")
975 ]
976 ]
976
977
977 # If the parent repo had a path to this subrepo and the matcher is
978 # If the parent repo had a path to this subrepo and the matcher is
978 # a prefix matcher, this submatcher always matches.
979 # a prefix matcher, this submatcher always matches.
979 if matcher.prefix():
980 if matcher.prefix():
980 self._always = any(f == path for f in matcher._files)
981 self._always = any(f == path for f in matcher._files)
981
982
982 def bad(self, f, msg):
983 def bad(self, f, msg):
983 self._matcher.bad(self._path + b"/" + f, msg)
984 self._matcher.bad(self._path + b"/" + f, msg)
984
985
985 def matchfn(self, f):
986 def matchfn(self, f):
986 # Some information is lost in the superclass's constructor, so we
987 # Some information is lost in the superclass's constructor, so we
987 # can not accurately create the matching function for the subdirectory
988 # can not accurately create the matching function for the subdirectory
988 # from the inputs. Instead, we override matchfn() and visitdir() to
989 # from the inputs. Instead, we override matchfn() and visitdir() to
989 # call the original matcher with the subdirectory path prepended.
990 # call the original matcher with the subdirectory path prepended.
990 return self._matcher.matchfn(self._path + b"/" + f)
991 return self._matcher.matchfn(self._path + b"/" + f)
991
992
992 def visitdir(self, dir):
993 def visitdir(self, dir):
993 if dir == b'':
994 if dir == b'':
994 dir = self._path
995 dir = self._path
995 else:
996 else:
996 dir = self._path + b"/" + dir
997 dir = self._path + b"/" + dir
997 return self._matcher.visitdir(dir)
998 return self._matcher.visitdir(dir)
998
999
999 def visitchildrenset(self, dir):
1000 def visitchildrenset(self, dir):
1000 if dir == b'':
1001 if dir == b'':
1001 dir = self._path
1002 dir = self._path
1002 else:
1003 else:
1003 dir = self._path + b"/" + dir
1004 dir = self._path + b"/" + dir
1004 return self._matcher.visitchildrenset(dir)
1005 return self._matcher.visitchildrenset(dir)
1005
1006
1006 def always(self):
1007 def always(self):
1007 return self._always
1008 return self._always
1008
1009
1009 def prefix(self):
1010 def prefix(self):
1010 return self._matcher.prefix() and not self._always
1011 return self._matcher.prefix() and not self._always
1011
1012
1012 @encoding.strmethod
1013 @encoding.strmethod
1013 def __repr__(self):
1014 def __repr__(self):
1014 return b'<subdirmatcher path=%r, matcher=%r>' % (
1015 return b'<subdirmatcher path=%r, matcher=%r>' % (
1015 self._path,
1016 self._path,
1016 self._matcher,
1017 self._matcher,
1017 )
1018 )
1018
1019
1019
1020
1020 class prefixdirmatcher(basematcher):
1021 class prefixdirmatcher(basematcher):
1021 """Adapt a matcher to work on a parent directory.
1022 """Adapt a matcher to work on a parent directory.
1022
1023
1023 The matcher's non-matching-attributes (bad, traversedir) are ignored.
1024 The matcher's non-matching-attributes (bad, traversedir) are ignored.
1024
1025
1025 The prefix path should usually be the relative path from the root of
1026 The prefix path should usually be the relative path from the root of
1026 this matcher to the root of the wrapped matcher.
1027 this matcher to the root of the wrapped matcher.
1027
1028
1028 >>> m1 = match(util.localpath(b'/root/d/e'), b'f', [b'../a.txt', b'b.txt'], auditor=lambda name: None)
1029 >>> m1 = match(util.localpath(b'/root/d/e'), b'f', [b'../a.txt', b'b.txt'], auditor=lambda name: None)
1029 >>> m2 = prefixdirmatcher(b'd/e', m1)
1030 >>> m2 = prefixdirmatcher(b'd/e', m1)
1030 >>> m2(b'a.txt')
1031 >>> m2(b'a.txt')
1031 False
1032 False
1032 >>> m2(b'd/e/a.txt')
1033 >>> m2(b'd/e/a.txt')
1033 True
1034 True
1034 >>> m2(b'd/e/b.txt')
1035 >>> m2(b'd/e/b.txt')
1035 False
1036 False
1036 >>> m2.files()
1037 >>> m2.files()
1037 ['d/e/a.txt', 'd/e/f/b.txt']
1038 ['d/e/a.txt', 'd/e/f/b.txt']
1038 >>> m2.exact(b'd/e/a.txt')
1039 >>> m2.exact(b'd/e/a.txt')
1039 True
1040 True
1040 >>> m2.visitdir(b'd')
1041 >>> m2.visitdir(b'd')
1041 True
1042 True
1042 >>> m2.visitdir(b'd/e')
1043 >>> m2.visitdir(b'd/e')
1043 True
1044 True
1044 >>> m2.visitdir(b'd/e/f')
1045 >>> m2.visitdir(b'd/e/f')
1045 True
1046 True
1046 >>> m2.visitdir(b'd/e/g')
1047 >>> m2.visitdir(b'd/e/g')
1047 False
1048 False
1048 >>> m2.visitdir(b'd/ef')
1049 >>> m2.visitdir(b'd/ef')
1049 False
1050 False
1050 """
1051 """
1051
1052
1052 def __init__(self, path, matcher, badfn=None):
1053 def __init__(self, path, matcher, badfn=None):
1053 super(prefixdirmatcher, self).__init__(badfn)
1054 super(prefixdirmatcher, self).__init__(badfn)
1054 if not path:
1055 if not path:
1055 raise error.ProgrammingError(b'prefix path must not be empty')
1056 raise error.ProgrammingError(b'prefix path must not be empty')
1056 self._path = path
1057 self._path = path
1057 self._pathprefix = path + b'/'
1058 self._pathprefix = path + b'/'
1058 self._matcher = matcher
1059 self._matcher = matcher
1059
1060
1060 @propertycache
1061 @propertycache
1061 def _files(self):
1062 def _files(self):
1062 return [self._pathprefix + f for f in self._matcher._files]
1063 return [self._pathprefix + f for f in self._matcher._files]
1063
1064
1064 def matchfn(self, f):
1065 def matchfn(self, f):
1065 if not f.startswith(self._pathprefix):
1066 if not f.startswith(self._pathprefix):
1066 return False
1067 return False
1067 return self._matcher.matchfn(f[len(self._pathprefix) :])
1068 return self._matcher.matchfn(f[len(self._pathprefix) :])
1068
1069
1069 @propertycache
1070 @propertycache
1070 def _pathdirs(self):
1071 def _pathdirs(self):
1071 return set(pathutil.finddirs(self._path))
1072 return set(pathutil.finddirs(self._path))
1072
1073
1073 def visitdir(self, dir):
1074 def visitdir(self, dir):
1074 if dir == self._path:
1075 if dir == self._path:
1075 return self._matcher.visitdir(b'')
1076 return self._matcher.visitdir(b'')
1076 if dir.startswith(self._pathprefix):
1077 if dir.startswith(self._pathprefix):
1077 return self._matcher.visitdir(dir[len(self._pathprefix) :])
1078 return self._matcher.visitdir(dir[len(self._pathprefix) :])
1078 return dir in self._pathdirs
1079 return dir in self._pathdirs
1079
1080
1080 def visitchildrenset(self, dir):
1081 def visitchildrenset(self, dir):
1081 if dir == self._path:
1082 if dir == self._path:
1082 return self._matcher.visitchildrenset(b'')
1083 return self._matcher.visitchildrenset(b'')
1083 if dir.startswith(self._pathprefix):
1084 if dir.startswith(self._pathprefix):
1084 return self._matcher.visitchildrenset(dir[len(self._pathprefix) :])
1085 return self._matcher.visitchildrenset(dir[len(self._pathprefix) :])
1085 if dir in self._pathdirs:
1086 if dir in self._pathdirs:
1086 return b'this'
1087 return b'this'
1087 return set()
1088 return set()
1088
1089
1089 def isexact(self):
1090 def isexact(self):
1090 return self._matcher.isexact()
1091 return self._matcher.isexact()
1091
1092
1092 def prefix(self):
1093 def prefix(self):
1093 return self._matcher.prefix()
1094 return self._matcher.prefix()
1094
1095
1095 @encoding.strmethod
1096 @encoding.strmethod
1096 def __repr__(self):
1097 def __repr__(self):
1097 return b'<prefixdirmatcher path=%r, matcher=%r>' % (
1098 return b'<prefixdirmatcher path=%r, matcher=%r>' % (
1098 pycompat.bytestr(self._path),
1099 pycompat.bytestr(self._path),
1099 self._matcher,
1100 self._matcher,
1100 )
1101 )
1101
1102
1102
1103
1103 class unionmatcher(basematcher):
1104 class unionmatcher(basematcher):
1104 """A matcher that is the union of several matchers.
1105 """A matcher that is the union of several matchers.
1105
1106
1106 The non-matching-attributes (bad, traversedir) are taken from the first
1107 The non-matching-attributes (bad, traversedir) are taken from the first
1107 matcher.
1108 matcher.
1108 """
1109 """
1109
1110
1110 def __init__(self, matchers):
1111 def __init__(self, matchers):
1111 m1 = matchers[0]
1112 m1 = matchers[0]
1112 super(unionmatcher, self).__init__()
1113 super(unionmatcher, self).__init__()
1113 self.traversedir = m1.traversedir
1114 self.traversedir = m1.traversedir
1114 self._matchers = matchers
1115 self._matchers = matchers
1115
1116
1116 def matchfn(self, f):
1117 def matchfn(self, f):
1117 for match in self._matchers:
1118 for match in self._matchers:
1118 if match(f):
1119 if match(f):
1119 return True
1120 return True
1120 return False
1121 return False
1121
1122
1122 def visitdir(self, dir):
1123 def visitdir(self, dir):
1123 r = False
1124 r = False
1124 for m in self._matchers:
1125 for m in self._matchers:
1125 v = m.visitdir(dir)
1126 v = m.visitdir(dir)
1126 if v == b'all':
1127 if v == b'all':
1127 return v
1128 return v
1128 r |= v
1129 r |= v
1129 return r
1130 return r
1130
1131
1131 def visitchildrenset(self, dir):
1132 def visitchildrenset(self, dir):
1132 r = set()
1133 r = set()
1133 this = False
1134 this = False
1134 for m in self._matchers:
1135 for m in self._matchers:
1135 v = m.visitchildrenset(dir)
1136 v = m.visitchildrenset(dir)
1136 if not v:
1137 if not v:
1137 continue
1138 continue
1138 if v == b'all':
1139 if v == b'all':
1139 return v
1140 return v
1140 if this or v == b'this':
1141 if this or v == b'this':
1141 this = True
1142 this = True
1142 # don't break, we might have an 'all' in here.
1143 # don't break, we might have an 'all' in here.
1143 continue
1144 continue
1144 assert isinstance(v, set)
1145 assert isinstance(v, set)
1145 r = r.union(v)
1146 r = r.union(v)
1146 if this:
1147 if this:
1147 return b'this'
1148 return b'this'
1148 return r
1149 return r
1149
1150
1150 @encoding.strmethod
1151 @encoding.strmethod
1151 def __repr__(self):
1152 def __repr__(self):
1152 return b'<unionmatcher matchers=%r>' % self._matchers
1153 return b'<unionmatcher matchers=%r>' % self._matchers
1153
1154
1154
1155
1155 def patkind(pattern, default=None):
1156 def patkind(pattern, default=None):
1156 r'''If pattern is 'kind:pat' with a known kind, return kind.
1157 r'''If pattern is 'kind:pat' with a known kind, return kind.
1157
1158
1158 >>> patkind(br're:.*\.c$')
1159 >>> patkind(br're:.*\.c$')
1159 're'
1160 're'
1160 >>> patkind(b'glob:*.c')
1161 >>> patkind(b'glob:*.c')
1161 'glob'
1162 'glob'
1162 >>> patkind(b'relpath:test.py')
1163 >>> patkind(b'relpath:test.py')
1163 'relpath'
1164 'relpath'
1164 >>> patkind(b'main.py')
1165 >>> patkind(b'main.py')
1165 >>> patkind(b'main.py', default=b're')
1166 >>> patkind(b'main.py', default=b're')
1166 're'
1167 're'
1167 '''
1168 '''
1168 return _patsplit(pattern, default)[0]
1169 return _patsplit(pattern, default)[0]
1169
1170
1170
1171
1171 def _patsplit(pattern, default):
1172 def _patsplit(pattern, default):
1172 """Split a string into the optional pattern kind prefix and the actual
1173 """Split a string into the optional pattern kind prefix and the actual
1173 pattern."""
1174 pattern."""
1174 if b':' in pattern:
1175 if b':' in pattern:
1175 kind, pat = pattern.split(b':', 1)
1176 kind, pat = pattern.split(b':', 1)
1176 if kind in allpatternkinds:
1177 if kind in allpatternkinds:
1177 return kind, pat
1178 return kind, pat
1178 return default, pattern
1179 return default, pattern
1179
1180
1180
1181
1181 def _globre(pat):
1182 def _globre(pat):
1182 r'''Convert an extended glob string to a regexp string.
1183 r'''Convert an extended glob string to a regexp string.
1183
1184
1184 >>> from . import pycompat
1185 >>> from . import pycompat
1185 >>> def bprint(s):
1186 >>> def bprint(s):
1186 ... print(pycompat.sysstr(s))
1187 ... print(pycompat.sysstr(s))
1187 >>> bprint(_globre(br'?'))
1188 >>> bprint(_globre(br'?'))
1188 .
1189 .
1189 >>> bprint(_globre(br'*'))
1190 >>> bprint(_globre(br'*'))
1190 [^/]*
1191 [^/]*
1191 >>> bprint(_globre(br'**'))
1192 >>> bprint(_globre(br'**'))
1192 .*
1193 .*
1193 >>> bprint(_globre(br'**/a'))
1194 >>> bprint(_globre(br'**/a'))
1194 (?:.*/)?a
1195 (?:.*/)?a
1195 >>> bprint(_globre(br'a/**/b'))
1196 >>> bprint(_globre(br'a/**/b'))
1196 a/(?:.*/)?b
1197 a/(?:.*/)?b
1197 >>> bprint(_globre(br'[a*?!^][^b][!c]'))
1198 >>> bprint(_globre(br'[a*?!^][^b][!c]'))
1198 [a*?!^][\^b][^c]
1199 [a*?!^][\^b][^c]
1199 >>> bprint(_globre(br'{a,b}'))
1200 >>> bprint(_globre(br'{a,b}'))
1200 (?:a|b)
1201 (?:a|b)
1201 >>> bprint(_globre(br'.\*\?'))
1202 >>> bprint(_globre(br'.\*\?'))
1202 \.\*\?
1203 \.\*\?
1203 '''
1204 '''
1204 i, n = 0, len(pat)
1205 i, n = 0, len(pat)
1205 res = b''
1206 res = b''
1206 group = 0
1207 group = 0
1207 escape = util.stringutil.regexbytesescapemap.get
1208 escape = util.stringutil.regexbytesescapemap.get
1208
1209
1209 def peek():
1210 def peek():
1210 return i < n and pat[i : i + 1]
1211 return i < n and pat[i : i + 1]
1211
1212
1212 while i < n:
1213 while i < n:
1213 c = pat[i : i + 1]
1214 c = pat[i : i + 1]
1214 i += 1
1215 i += 1
1215 if c not in b'*?[{},\\':
1216 if c not in b'*?[{},\\':
1216 res += escape(c, c)
1217 res += escape(c, c)
1217 elif c == b'*':
1218 elif c == b'*':
1218 if peek() == b'*':
1219 if peek() == b'*':
1219 i += 1
1220 i += 1
1220 if peek() == b'/':
1221 if peek() == b'/':
1221 i += 1
1222 i += 1
1222 res += b'(?:.*/)?'
1223 res += b'(?:.*/)?'
1223 else:
1224 else:
1224 res += b'.*'
1225 res += b'.*'
1225 else:
1226 else:
1226 res += b'[^/]*'
1227 res += b'[^/]*'
1227 elif c == b'?':
1228 elif c == b'?':
1228 res += b'.'
1229 res += b'.'
1229 elif c == b'[':
1230 elif c == b'[':
1230 j = i
1231 j = i
1231 if j < n and pat[j : j + 1] in b'!]':
1232 if j < n and pat[j : j + 1] in b'!]':
1232 j += 1
1233 j += 1
1233 while j < n and pat[j : j + 1] != b']':
1234 while j < n and pat[j : j + 1] != b']':
1234 j += 1
1235 j += 1
1235 if j >= n:
1236 if j >= n:
1236 res += b'\\['
1237 res += b'\\['
1237 else:
1238 else:
1238 stuff = pat[i:j].replace(b'\\', b'\\\\')
1239 stuff = pat[i:j].replace(b'\\', b'\\\\')
1239 i = j + 1
1240 i = j + 1
1240 if stuff[0:1] == b'!':
1241 if stuff[0:1] == b'!':
1241 stuff = b'^' + stuff[1:]
1242 stuff = b'^' + stuff[1:]
1242 elif stuff[0:1] == b'^':
1243 elif stuff[0:1] == b'^':
1243 stuff = b'\\' + stuff
1244 stuff = b'\\' + stuff
1244 res = b'%s[%s]' % (res, stuff)
1245 res = b'%s[%s]' % (res, stuff)
1245 elif c == b'{':
1246 elif c == b'{':
1246 group += 1
1247 group += 1
1247 res += b'(?:'
1248 res += b'(?:'
1248 elif c == b'}' and group:
1249 elif c == b'}' and group:
1249 res += b')'
1250 res += b')'
1250 group -= 1
1251 group -= 1
1251 elif c == b',' and group:
1252 elif c == b',' and group:
1252 res += b'|'
1253 res += b'|'
1253 elif c == b'\\':
1254 elif c == b'\\':
1254 p = peek()
1255 p = peek()
1255 if p:
1256 if p:
1256 i += 1
1257 i += 1
1257 res += escape(p, p)
1258 res += escape(p, p)
1258 else:
1259 else:
1259 res += escape(c, c)
1260 res += escape(c, c)
1260 else:
1261 else:
1261 res += escape(c, c)
1262 res += escape(c, c)
1262 return res
1263 return res
1263
1264
1264
1265
1265 def _regex(kind, pat, globsuffix):
1266 def _regex(kind, pat, globsuffix):
1266 '''Convert a (normalized) pattern of any kind into a
1267 '''Convert a (normalized) pattern of any kind into a
1267 regular expression.
1268 regular expression.
1268 globsuffix is appended to the regexp of globs.'''
1269 globsuffix is appended to the regexp of globs.'''
1269
1270
1270 if rustmod is not None:
1271 if rustmod is not None:
1271 try:
1272 try:
1272 return rustmod.build_single_regex(kind, pat, globsuffix)
1273 return rustmod.build_single_regex(kind, pat, globsuffix)
1273 except rustmod.PatternError:
1274 except rustmod.PatternError:
1274 raise error.ProgrammingError(
1275 raise error.ProgrammingError(
1275 b'not a regex pattern: %s:%s' % (kind, pat)
1276 b'not a regex pattern: %s:%s' % (kind, pat)
1276 )
1277 )
1277
1278
1278 if not pat and kind in (b'glob', b'relpath'):
1279 if not pat and kind in (b'glob', b'relpath'):
1279 return b''
1280 return b''
1280 if kind == b're':
1281 if kind == b're':
1281 return pat
1282 return pat
1282 if kind in (b'path', b'relpath'):
1283 if kind in (b'path', b'relpath'):
1283 if pat == b'.':
1284 if pat == b'.':
1284 return b''
1285 return b''
1285 return util.stringutil.reescape(pat) + b'(?:/|$)'
1286 return util.stringutil.reescape(pat) + b'(?:/|$)'
1286 if kind == b'rootfilesin':
1287 if kind == b'rootfilesin':
1287 if pat == b'.':
1288 if pat == b'.':
1288 escaped = b''
1289 escaped = b''
1289 else:
1290 else:
1290 # Pattern is a directory name.
1291 # Pattern is a directory name.
1291 escaped = util.stringutil.reescape(pat) + b'/'
1292 escaped = util.stringutil.reescape(pat) + b'/'
1292 # Anything after the pattern must be a non-directory.
1293 # Anything after the pattern must be a non-directory.
1293 return escaped + b'[^/]+$'
1294 return escaped + b'[^/]+$'
1294 if kind == b'relglob':
1295 if kind == b'relglob':
1295 globre = _globre(pat)
1296 globre = _globre(pat)
1296 if globre.startswith(b'[^/]*'):
1297 if globre.startswith(b'[^/]*'):
1297 # When pat has the form *XYZ (common), make the returned regex more
1298 # When pat has the form *XYZ (common), make the returned regex more
1298 # legible by returning the regex for **XYZ instead of **/*XYZ.
1299 # legible by returning the regex for **XYZ instead of **/*XYZ.
1299 return b'.*' + globre[len(b'[^/]*') :] + globsuffix
1300 return b'.*' + globre[len(b'[^/]*') :] + globsuffix
1300 return b'(?:|.*/)' + globre + globsuffix
1301 return b'(?:|.*/)' + globre + globsuffix
1301 if kind == b'relre':
1302 if kind == b'relre':
1302 if pat.startswith(b'^'):
1303 if pat.startswith(b'^'):
1303 return pat
1304 return pat
1304 return b'.*' + pat
1305 return b'.*' + pat
1305 if kind in (b'glob', b'rootglob'):
1306 if kind in (b'glob', b'rootglob'):
1306 return _globre(pat) + globsuffix
1307 return _globre(pat) + globsuffix
1307 raise error.ProgrammingError(b'not a regex pattern: %s:%s' % (kind, pat))
1308 raise error.ProgrammingError(b'not a regex pattern: %s:%s' % (kind, pat))
1308
1309
1309
1310
1310 def _buildmatch(kindpats, globsuffix, root):
1311 def _buildmatch(kindpats, globsuffix, root):
1311 '''Return regexp string and a matcher function for kindpats.
1312 '''Return regexp string and a matcher function for kindpats.
1312 globsuffix is appended to the regexp of globs.'''
1313 globsuffix is appended to the regexp of globs.'''
1313 matchfuncs = []
1314 matchfuncs = []
1314
1315
1315 subincludes, kindpats = _expandsubinclude(kindpats, root)
1316 subincludes, kindpats = _expandsubinclude(kindpats, root)
1316 if subincludes:
1317 if subincludes:
1317 submatchers = {}
1318 submatchers = {}
1318
1319
1319 def matchsubinclude(f):
1320 def matchsubinclude(f):
1320 for prefix, matcherargs in subincludes:
1321 for prefix, matcherargs in subincludes:
1321 if f.startswith(prefix):
1322 if f.startswith(prefix):
1322 mf = submatchers.get(prefix)
1323 mf = submatchers.get(prefix)
1323 if mf is None:
1324 if mf is None:
1324 mf = match(*matcherargs)
1325 mf = match(*matcherargs)
1325 submatchers[prefix] = mf
1326 submatchers[prefix] = mf
1326
1327
1327 if mf(f[len(prefix) :]):
1328 if mf(f[len(prefix) :]):
1328 return True
1329 return True
1329 return False
1330 return False
1330
1331
1331 matchfuncs.append(matchsubinclude)
1332 matchfuncs.append(matchsubinclude)
1332
1333
1333 regex = b''
1334 regex = b''
1334 if kindpats:
1335 if kindpats:
1335 if all(k == b'rootfilesin' for k, p, s in kindpats):
1336 if all(k == b'rootfilesin' for k, p, s in kindpats):
1336 dirs = {p for k, p, s in kindpats}
1337 dirs = {p for k, p, s in kindpats}
1337
1338
1338 def mf(f):
1339 def mf(f):
1339 i = f.rfind(b'/')
1340 i = f.rfind(b'/')
1340 if i >= 0:
1341 if i >= 0:
1341 dir = f[:i]
1342 dir = f[:i]
1342 else:
1343 else:
1343 dir = b'.'
1344 dir = b'.'
1344 return dir in dirs
1345 return dir in dirs
1345
1346
1346 regex = b'rootfilesin: %s' % stringutil.pprint(list(sorted(dirs)))
1347 regex = b'rootfilesin: %s' % stringutil.pprint(list(sorted(dirs)))
1347 matchfuncs.append(mf)
1348 matchfuncs.append(mf)
1348 else:
1349 else:
1349 regex, mf = _buildregexmatch(kindpats, globsuffix)
1350 regex, mf = _buildregexmatch(kindpats, globsuffix)
1350 matchfuncs.append(mf)
1351 matchfuncs.append(mf)
1351
1352
1352 if len(matchfuncs) == 1:
1353 if len(matchfuncs) == 1:
1353 return regex, matchfuncs[0]
1354 return regex, matchfuncs[0]
1354 else:
1355 else:
1355 return regex, lambda f: any(mf(f) for mf in matchfuncs)
1356 return regex, lambda f: any(mf(f) for mf in matchfuncs)
1356
1357
1357
1358
1358 MAX_RE_SIZE = 20000
1359 MAX_RE_SIZE = 20000
1359
1360
1360
1361
1361 def _joinregexes(regexps):
1362 def _joinregexes(regexps):
1362 """gather multiple regular expressions into a single one"""
1363 """gather multiple regular expressions into a single one"""
1363 return b'|'.join(regexps)
1364 return b'|'.join(regexps)
1364
1365
1365
1366
1366 def _buildregexmatch(kindpats, globsuffix):
1367 def _buildregexmatch(kindpats, globsuffix):
1367 """Build a match function from a list of kinds and kindpats,
1368 """Build a match function from a list of kinds and kindpats,
1368 return regexp string and a matcher function.
1369 return regexp string and a matcher function.
1369
1370
1370 Test too large input
1371 Test too large input
1371 >>> _buildregexmatch([
1372 >>> _buildregexmatch([
1372 ... (b'relglob', b'?' * MAX_RE_SIZE, b'')
1373 ... (b'relglob', b'?' * MAX_RE_SIZE, b'')
1373 ... ], b'$')
1374 ... ], b'$')
1374 Traceback (most recent call last):
1375 Traceback (most recent call last):
1375 ...
1376 ...
1376 Abort: matcher pattern is too long (20009 bytes)
1377 Abort: matcher pattern is too long (20009 bytes)
1377 """
1378 """
1378 try:
1379 try:
1379 allgroups = []
1380 allgroups = []
1380 regexps = [_regex(k, p, globsuffix) for (k, p, s) in kindpats]
1381 regexps = [_regex(k, p, globsuffix) for (k, p, s) in kindpats]
1381 fullregexp = _joinregexes(regexps)
1382 fullregexp = _joinregexes(regexps)
1382
1383
1383 startidx = 0
1384 startidx = 0
1384 groupsize = 0
1385 groupsize = 0
1385 for idx, r in enumerate(regexps):
1386 for idx, r in enumerate(regexps):
1386 piecesize = len(r)
1387 piecesize = len(r)
1387 if piecesize > MAX_RE_SIZE:
1388 if piecesize > MAX_RE_SIZE:
1388 msg = _(b"matcher pattern is too long (%d bytes)") % piecesize
1389 msg = _(b"matcher pattern is too long (%d bytes)") % piecesize
1389 raise error.Abort(msg)
1390 raise error.Abort(msg)
1390 elif (groupsize + piecesize) > MAX_RE_SIZE:
1391 elif (groupsize + piecesize) > MAX_RE_SIZE:
1391 group = regexps[startidx:idx]
1392 group = regexps[startidx:idx]
1392 allgroups.append(_joinregexes(group))
1393 allgroups.append(_joinregexes(group))
1393 startidx = idx
1394 startidx = idx
1394 groupsize = 0
1395 groupsize = 0
1395 groupsize += piecesize + 1
1396 groupsize += piecesize + 1
1396
1397
1397 if startidx == 0:
1398 if startidx == 0:
1398 matcher = _rematcher(fullregexp)
1399 matcher = _rematcher(fullregexp)
1399 func = lambda s: bool(matcher(s))
1400 func = lambda s: bool(matcher(s))
1400 else:
1401 else:
1401 group = regexps[startidx:]
1402 group = regexps[startidx:]
1402 allgroups.append(_joinregexes(group))
1403 allgroups.append(_joinregexes(group))
1403 allmatchers = [_rematcher(g) for g in allgroups]
1404 allmatchers = [_rematcher(g) for g in allgroups]
1404 func = lambda s: any(m(s) for m in allmatchers)
1405 func = lambda s: any(m(s) for m in allmatchers)
1405 return fullregexp, func
1406 return fullregexp, func
1406 except re.error:
1407 except re.error:
1407 for k, p, s in kindpats:
1408 for k, p, s in kindpats:
1408 try:
1409 try:
1409 _rematcher(_regex(k, p, globsuffix))
1410 _rematcher(_regex(k, p, globsuffix))
1410 except re.error:
1411 except re.error:
1411 if s:
1412 if s:
1412 raise error.Abort(
1413 raise error.Abort(
1413 _(b"%s: invalid pattern (%s): %s") % (s, k, p)
1414 _(b"%s: invalid pattern (%s): %s") % (s, k, p)
1414 )
1415 )
1415 else:
1416 else:
1416 raise error.Abort(_(b"invalid pattern (%s): %s") % (k, p))
1417 raise error.Abort(_(b"invalid pattern (%s): %s") % (k, p))
1417 raise error.Abort(_(b"invalid pattern"))
1418 raise error.Abort(_(b"invalid pattern"))
1418
1419
1419
1420
1420 def _patternrootsanddirs(kindpats):
1421 def _patternrootsanddirs(kindpats):
1421 '''Returns roots and directories corresponding to each pattern.
1422 '''Returns roots and directories corresponding to each pattern.
1422
1423
1423 This calculates the roots and directories exactly matching the patterns and
1424 This calculates the roots and directories exactly matching the patterns and
1424 returns a tuple of (roots, dirs) for each. It does not return other
1425 returns a tuple of (roots, dirs) for each. It does not return other
1425 directories which may also need to be considered, like the parent
1426 directories which may also need to be considered, like the parent
1426 directories.
1427 directories.
1427 '''
1428 '''
1428 r = []
1429 r = []
1429 d = []
1430 d = []
1430 for kind, pat, source in kindpats:
1431 for kind, pat, source in kindpats:
1431 if kind in (b'glob', b'rootglob'): # find the non-glob prefix
1432 if kind in (b'glob', b'rootglob'): # find the non-glob prefix
1432 root = []
1433 root = []
1433 for p in pat.split(b'/'):
1434 for p in pat.split(b'/'):
1434 if b'[' in p or b'{' in p or b'*' in p or b'?' in p:
1435 if b'[' in p or b'{' in p or b'*' in p or b'?' in p:
1435 break
1436 break
1436 root.append(p)
1437 root.append(p)
1437 r.append(b'/'.join(root))
1438 r.append(b'/'.join(root))
1438 elif kind in (b'relpath', b'path'):
1439 elif kind in (b'relpath', b'path'):
1439 if pat == b'.':
1440 if pat == b'.':
1440 pat = b''
1441 pat = b''
1441 r.append(pat)
1442 r.append(pat)
1442 elif kind in (b'rootfilesin',):
1443 elif kind in (b'rootfilesin',):
1443 if pat == b'.':
1444 if pat == b'.':
1444 pat = b''
1445 pat = b''
1445 d.append(pat)
1446 d.append(pat)
1446 else: # relglob, re, relre
1447 else: # relglob, re, relre
1447 r.append(b'')
1448 r.append(b'')
1448 return r, d
1449 return r, d
1449
1450
1450
1451
1451 def _roots(kindpats):
1452 def _roots(kindpats):
1452 '''Returns root directories to match recursively from the given patterns.'''
1453 '''Returns root directories to match recursively from the given patterns.'''
1453 roots, dirs = _patternrootsanddirs(kindpats)
1454 roots, dirs = _patternrootsanddirs(kindpats)
1454 return roots
1455 return roots
1455
1456
1456
1457
1457 def _rootsdirsandparents(kindpats):
1458 def _rootsdirsandparents(kindpats):
1458 '''Returns roots and exact directories from patterns.
1459 '''Returns roots and exact directories from patterns.
1459
1460
1460 `roots` are directories to match recursively, `dirs` should
1461 `roots` are directories to match recursively, `dirs` should
1461 be matched non-recursively, and `parents` are the implicitly required
1462 be matched non-recursively, and `parents` are the implicitly required
1462 directories to walk to items in either roots or dirs.
1463 directories to walk to items in either roots or dirs.
1463
1464
1464 Returns a tuple of (roots, dirs, parents).
1465 Returns a tuple of (roots, dirs, parents).
1465
1466
1466 >>> r = _rootsdirsandparents(
1467 >>> r = _rootsdirsandparents(
1467 ... [(b'glob', b'g/h/*', b''), (b'glob', b'g/h', b''),
1468 ... [(b'glob', b'g/h/*', b''), (b'glob', b'g/h', b''),
1468 ... (b'glob', b'g*', b'')])
1469 ... (b'glob', b'g*', b'')])
1469 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1470 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1470 (['g/h', 'g/h', ''], []) ['', 'g']
1471 (['g/h', 'g/h', ''], []) ['', 'g']
1471 >>> r = _rootsdirsandparents(
1472 >>> r = _rootsdirsandparents(
1472 ... [(b'rootfilesin', b'g/h', b''), (b'rootfilesin', b'', b'')])
1473 ... [(b'rootfilesin', b'g/h', b''), (b'rootfilesin', b'', b'')])
1473 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1474 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1474 ([], ['g/h', '']) ['', 'g']
1475 ([], ['g/h', '']) ['', 'g']
1475 >>> r = _rootsdirsandparents(
1476 >>> r = _rootsdirsandparents(
1476 ... [(b'relpath', b'r', b''), (b'path', b'p/p', b''),
1477 ... [(b'relpath', b'r', b''), (b'path', b'p/p', b''),
1477 ... (b'path', b'', b'')])
1478 ... (b'path', b'', b'')])
1478 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1479 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1479 (['r', 'p/p', ''], []) ['', 'p']
1480 (['r', 'p/p', ''], []) ['', 'p']
1480 >>> r = _rootsdirsandparents(
1481 >>> r = _rootsdirsandparents(
1481 ... [(b'relglob', b'rg*', b''), (b're', b're/', b''),
1482 ... [(b'relglob', b'rg*', b''), (b're', b're/', b''),
1482 ... (b'relre', b'rr', b'')])
1483 ... (b'relre', b'rr', b'')])
1483 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1484 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1484 (['', '', ''], []) ['']
1485 (['', '', ''], []) ['']
1485 '''
1486 '''
1486 r, d = _patternrootsanddirs(kindpats)
1487 r, d = _patternrootsanddirs(kindpats)
1487
1488
1488 p = set()
1489 p = set()
1489 # Add the parents as non-recursive/exact directories, since they must be
1490 # Add the parents as non-recursive/exact directories, since they must be
1490 # scanned to get to either the roots or the other exact directories.
1491 # scanned to get to either the roots or the other exact directories.
1491 p.update(pathutil.dirs(d))
1492 p.update(pathutil.dirs(d))
1492 p.update(pathutil.dirs(r))
1493 p.update(pathutil.dirs(r))
1493
1494
1494 # FIXME: all uses of this function convert these to sets, do so before
1495 # FIXME: all uses of this function convert these to sets, do so before
1495 # returning.
1496 # returning.
1496 # FIXME: all uses of this function do not need anything in 'roots' and
1497 # FIXME: all uses of this function do not need anything in 'roots' and
1497 # 'dirs' to also be in 'parents', consider removing them before returning.
1498 # 'dirs' to also be in 'parents', consider removing them before returning.
1498 return r, d, p
1499 return r, d, p
1499
1500
1500
1501
1501 def _explicitfiles(kindpats):
1502 def _explicitfiles(kindpats):
1502 '''Returns the potential explicit filenames from the patterns.
1503 '''Returns the potential explicit filenames from the patterns.
1503
1504
1504 >>> _explicitfiles([(b'path', b'foo/bar', b'')])
1505 >>> _explicitfiles([(b'path', b'foo/bar', b'')])
1505 ['foo/bar']
1506 ['foo/bar']
1506 >>> _explicitfiles([(b'rootfilesin', b'foo/bar', b'')])
1507 >>> _explicitfiles([(b'rootfilesin', b'foo/bar', b'')])
1507 []
1508 []
1508 '''
1509 '''
1509 # Keep only the pattern kinds where one can specify filenames (vs only
1510 # Keep only the pattern kinds where one can specify filenames (vs only
1510 # directory names).
1511 # directory names).
1511 filable = [kp for kp in kindpats if kp[0] not in (b'rootfilesin',)]
1512 filable = [kp for kp in kindpats if kp[0] not in (b'rootfilesin',)]
1512 return _roots(filable)
1513 return _roots(filable)
1513
1514
1514
1515
1515 def _prefix(kindpats):
1516 def _prefix(kindpats):
1516 '''Whether all the patterns match a prefix (i.e. recursively)'''
1517 '''Whether all the patterns match a prefix (i.e. recursively)'''
1517 for kind, pat, source in kindpats:
1518 for kind, pat, source in kindpats:
1518 if kind not in (b'path', b'relpath'):
1519 if kind not in (b'path', b'relpath'):
1519 return False
1520 return False
1520 return True
1521 return True
1521
1522
1522
1523
1523 _commentre = None
1524 _commentre = None
1524
1525
1525
1526
1526 def readpatternfile(filepath, warn, sourceinfo=False):
1527 def readpatternfile(filepath, warn, sourceinfo=False):
1527 '''parse a pattern file, returning a list of
1528 '''parse a pattern file, returning a list of
1528 patterns. These patterns should be given to compile()
1529 patterns. These patterns should be given to compile()
1529 to be validated and converted into a match function.
1530 to be validated and converted into a match function.
1530
1531
1531 trailing white space is dropped.
1532 trailing white space is dropped.
1532 the escape character is backslash.
1533 the escape character is backslash.
1533 comments start with #.
1534 comments start with #.
1534 empty lines are skipped.
1535 empty lines are skipped.
1535
1536
1536 lines can be of the following formats:
1537 lines can be of the following formats:
1537
1538
1538 syntax: regexp # defaults following lines to non-rooted regexps
1539 syntax: regexp # defaults following lines to non-rooted regexps
1539 syntax: glob # defaults following lines to non-rooted globs
1540 syntax: glob # defaults following lines to non-rooted globs
1540 re:pattern # non-rooted regular expression
1541 re:pattern # non-rooted regular expression
1541 glob:pattern # non-rooted glob
1542 glob:pattern # non-rooted glob
1542 rootglob:pat # rooted glob (same root as ^ in regexps)
1543 rootglob:pat # rooted glob (same root as ^ in regexps)
1543 pattern # pattern of the current default type
1544 pattern # pattern of the current default type
1544
1545
1545 if sourceinfo is set, returns a list of tuples:
1546 if sourceinfo is set, returns a list of tuples:
1546 (pattern, lineno, originalline).
1547 (pattern, lineno, originalline).
1547 This is useful to debug ignore patterns.
1548 This is useful to debug ignore patterns.
1548 '''
1549 '''
1549
1550
1550 if rustmod is not None:
1551 if rustmod is not None:
1551 result, warnings = rustmod.read_pattern_file(
1552 result, warnings = rustmod.read_pattern_file(
1552 filepath, bool(warn), sourceinfo,
1553 filepath, bool(warn), sourceinfo,
1553 )
1554 )
1554
1555
1555 for warning_params in warnings:
1556 for warning_params in warnings:
1556 # Can't be easily emitted from Rust, because it would require
1557 # Can't be easily emitted from Rust, because it would require
1557 # a mechanism for both gettext and calling the `warn` function.
1558 # a mechanism for both gettext and calling the `warn` function.
1558 warn(_(b"%s: ignoring invalid syntax '%s'\n") % warning_params)
1559 warn(_(b"%s: ignoring invalid syntax '%s'\n") % warning_params)
1559
1560
1560 return result
1561 return result
1561
1562
1562 syntaxes = {
1563 syntaxes = {
1563 b're': b'relre:',
1564 b're': b'relre:',
1564 b'regexp': b'relre:',
1565 b'regexp': b'relre:',
1565 b'glob': b'relglob:',
1566 b'glob': b'relglob:',
1566 b'rootglob': b'rootglob:',
1567 b'rootglob': b'rootglob:',
1567 b'include': b'include',
1568 b'include': b'include',
1568 b'subinclude': b'subinclude',
1569 b'subinclude': b'subinclude',
1569 }
1570 }
1570 syntax = b'relre:'
1571 syntax = b'relre:'
1571 patterns = []
1572 patterns = []
1572
1573
1573 fp = open(filepath, b'rb')
1574 fp = open(filepath, b'rb')
1574 for lineno, line in enumerate(util.iterfile(fp), start=1):
1575 for lineno, line in enumerate(util.iterfile(fp), start=1):
1575 if b"#" in line:
1576 if b"#" in line:
1576 global _commentre
1577 global _commentre
1577 if not _commentre:
1578 if not _commentre:
1578 _commentre = util.re.compile(br'((?:^|[^\\])(?:\\\\)*)#.*')
1579 _commentre = util.re.compile(br'((?:^|[^\\])(?:\\\\)*)#.*')
1579 # remove comments prefixed by an even number of escapes
1580 # remove comments prefixed by an even number of escapes
1580 m = _commentre.search(line)
1581 m = _commentre.search(line)
1581 if m:
1582 if m:
1582 line = line[: m.end(1)]
1583 line = line[: m.end(1)]
1583 # fixup properly escaped comments that survived the above
1584 # fixup properly escaped comments that survived the above
1584 line = line.replace(b"\\#", b"#")
1585 line = line.replace(b"\\#", b"#")
1585 line = line.rstrip()
1586 line = line.rstrip()
1586 if not line:
1587 if not line:
1587 continue
1588 continue
1588
1589
1589 if line.startswith(b'syntax:'):
1590 if line.startswith(b'syntax:'):
1590 s = line[7:].strip()
1591 s = line[7:].strip()
1591 try:
1592 try:
1592 syntax = syntaxes[s]
1593 syntax = syntaxes[s]
1593 except KeyError:
1594 except KeyError:
1594 if warn:
1595 if warn:
1595 warn(
1596 warn(
1596 _(b"%s: ignoring invalid syntax '%s'\n") % (filepath, s)
1597 _(b"%s: ignoring invalid syntax '%s'\n") % (filepath, s)
1597 )
1598 )
1598 continue
1599 continue
1599
1600
1600 linesyntax = syntax
1601 linesyntax = syntax
1601 for s, rels in pycompat.iteritems(syntaxes):
1602 for s, rels in pycompat.iteritems(syntaxes):
1602 if line.startswith(rels):
1603 if line.startswith(rels):
1603 linesyntax = rels
1604 linesyntax = rels
1604 line = line[len(rels) :]
1605 line = line[len(rels) :]
1605 break
1606 break
1606 elif line.startswith(s + b':'):
1607 elif line.startswith(s + b':'):
1607 linesyntax = rels
1608 linesyntax = rels
1608 line = line[len(s) + 1 :]
1609 line = line[len(s) + 1 :]
1609 break
1610 break
1610 if sourceinfo:
1611 if sourceinfo:
1611 patterns.append((linesyntax + line, lineno, line))
1612 patterns.append((linesyntax + line, lineno, line))
1612 else:
1613 else:
1613 patterns.append(linesyntax + line)
1614 patterns.append(linesyntax + line)
1614 fp.close()
1615 fp.close()
1615 return patterns
1616 return patterns
General Comments 0
You need to be logged in to leave comments. Login now