##// END OF EJS Templates
match: remove explicitdir attribute...
Martin von Zweigbergk -
r44114:5e1b0470 default
parent child Browse files
Show More
@@ -1,1625 +1,1614 b''
1 # match.py - filename matching
1 # match.py - filename matching
2 #
2 #
3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import, print_function
8 from __future__ import absolute_import, print_function
9
9
10 import copy
10 import copy
11 import itertools
11 import itertools
12 import os
12 import os
13 import re
13 import re
14
14
15 from .i18n import _
15 from .i18n import _
16 from .pycompat import open
16 from .pycompat import open
17 from . import (
17 from . import (
18 encoding,
18 encoding,
19 error,
19 error,
20 pathutil,
20 pathutil,
21 policy,
21 policy,
22 pycompat,
22 pycompat,
23 util,
23 util,
24 )
24 )
25 from .utils import stringutil
25 from .utils import stringutil
26
26
27 rustmod = policy.importrust('filepatterns')
27 rustmod = policy.importrust('filepatterns')
28
28
29 allpatternkinds = (
29 allpatternkinds = (
30 b're',
30 b're',
31 b'glob',
31 b'glob',
32 b'path',
32 b'path',
33 b'relglob',
33 b'relglob',
34 b'relpath',
34 b'relpath',
35 b'relre',
35 b'relre',
36 b'rootglob',
36 b'rootglob',
37 b'listfile',
37 b'listfile',
38 b'listfile0',
38 b'listfile0',
39 b'set',
39 b'set',
40 b'include',
40 b'include',
41 b'subinclude',
41 b'subinclude',
42 b'rootfilesin',
42 b'rootfilesin',
43 )
43 )
44 cwdrelativepatternkinds = (b'relpath', b'glob')
44 cwdrelativepatternkinds = (b'relpath', b'glob')
45
45
46 propertycache = util.propertycache
46 propertycache = util.propertycache
47
47
48
48
49 def _rematcher(regex):
49 def _rematcher(regex):
50 '''compile the regexp with the best available regexp engine and return a
50 '''compile the regexp with the best available regexp engine and return a
51 matcher function'''
51 matcher function'''
52 m = util.re.compile(regex)
52 m = util.re.compile(regex)
53 try:
53 try:
54 # slightly faster, provided by facebook's re2 bindings
54 # slightly faster, provided by facebook's re2 bindings
55 return m.test_match
55 return m.test_match
56 except AttributeError:
56 except AttributeError:
57 return m.match
57 return m.match
58
58
59
59
60 def _expandsets(kindpats, ctx=None, listsubrepos=False, badfn=None):
60 def _expandsets(kindpats, ctx=None, listsubrepos=False, badfn=None):
61 '''Returns the kindpats list with the 'set' patterns expanded to matchers'''
61 '''Returns the kindpats list with the 'set' patterns expanded to matchers'''
62 matchers = []
62 matchers = []
63 other = []
63 other = []
64
64
65 for kind, pat, source in kindpats:
65 for kind, pat, source in kindpats:
66 if kind == b'set':
66 if kind == b'set':
67 if ctx is None:
67 if ctx is None:
68 raise error.ProgrammingError(
68 raise error.ProgrammingError(
69 b"fileset expression with no context"
69 b"fileset expression with no context"
70 )
70 )
71 matchers.append(ctx.matchfileset(pat, badfn=badfn))
71 matchers.append(ctx.matchfileset(pat, badfn=badfn))
72
72
73 if listsubrepos:
73 if listsubrepos:
74 for subpath in ctx.substate:
74 for subpath in ctx.substate:
75 sm = ctx.sub(subpath).matchfileset(pat, badfn=badfn)
75 sm = ctx.sub(subpath).matchfileset(pat, badfn=badfn)
76 pm = prefixdirmatcher(subpath, sm, badfn=badfn)
76 pm = prefixdirmatcher(subpath, sm, badfn=badfn)
77 matchers.append(pm)
77 matchers.append(pm)
78
78
79 continue
79 continue
80 other.append((kind, pat, source))
80 other.append((kind, pat, source))
81 return matchers, other
81 return matchers, other
82
82
83
83
84 def _expandsubinclude(kindpats, root):
84 def _expandsubinclude(kindpats, root):
85 '''Returns the list of subinclude matcher args and the kindpats without the
85 '''Returns the list of subinclude matcher args and the kindpats without the
86 subincludes in it.'''
86 subincludes in it.'''
87 relmatchers = []
87 relmatchers = []
88 other = []
88 other = []
89
89
90 for kind, pat, source in kindpats:
90 for kind, pat, source in kindpats:
91 if kind == b'subinclude':
91 if kind == b'subinclude':
92 sourceroot = pathutil.dirname(util.normpath(source))
92 sourceroot = pathutil.dirname(util.normpath(source))
93 pat = util.pconvert(pat)
93 pat = util.pconvert(pat)
94 path = pathutil.join(sourceroot, pat)
94 path = pathutil.join(sourceroot, pat)
95
95
96 newroot = pathutil.dirname(path)
96 newroot = pathutil.dirname(path)
97 matcherargs = (newroot, b'', [], [b'include:%s' % path])
97 matcherargs = (newroot, b'', [], [b'include:%s' % path])
98
98
99 prefix = pathutil.canonpath(root, root, newroot)
99 prefix = pathutil.canonpath(root, root, newroot)
100 if prefix:
100 if prefix:
101 prefix += b'/'
101 prefix += b'/'
102 relmatchers.append((prefix, matcherargs))
102 relmatchers.append((prefix, matcherargs))
103 else:
103 else:
104 other.append((kind, pat, source))
104 other.append((kind, pat, source))
105
105
106 return relmatchers, other
106 return relmatchers, other
107
107
108
108
109 def _kindpatsalwaysmatch(kindpats):
109 def _kindpatsalwaysmatch(kindpats):
110 """"Checks whether the kindspats match everything, as e.g.
110 """"Checks whether the kindspats match everything, as e.g.
111 'relpath:.' does.
111 'relpath:.' does.
112 """
112 """
113 for kind, pat, source in kindpats:
113 for kind, pat, source in kindpats:
114 if pat != b'' or kind not in [b'relpath', b'glob']:
114 if pat != b'' or kind not in [b'relpath', b'glob']:
115 return False
115 return False
116 return True
116 return True
117
117
118
118
119 def _buildkindpatsmatcher(
119 def _buildkindpatsmatcher(
120 matchercls, root, kindpats, ctx=None, listsubrepos=False, badfn=None
120 matchercls, root, kindpats, ctx=None, listsubrepos=False, badfn=None
121 ):
121 ):
122 matchers = []
122 matchers = []
123 fms, kindpats = _expandsets(
123 fms, kindpats = _expandsets(
124 kindpats, ctx=ctx, listsubrepos=listsubrepos, badfn=badfn
124 kindpats, ctx=ctx, listsubrepos=listsubrepos, badfn=badfn
125 )
125 )
126 if kindpats:
126 if kindpats:
127 m = matchercls(root, kindpats, badfn=badfn)
127 m = matchercls(root, kindpats, badfn=badfn)
128 matchers.append(m)
128 matchers.append(m)
129 if fms:
129 if fms:
130 matchers.extend(fms)
130 matchers.extend(fms)
131 if not matchers:
131 if not matchers:
132 return nevermatcher(badfn=badfn)
132 return nevermatcher(badfn=badfn)
133 if len(matchers) == 1:
133 if len(matchers) == 1:
134 return matchers[0]
134 return matchers[0]
135 return unionmatcher(matchers)
135 return unionmatcher(matchers)
136
136
137
137
138 def match(
138 def match(
139 root,
139 root,
140 cwd,
140 cwd,
141 patterns=None,
141 patterns=None,
142 include=None,
142 include=None,
143 exclude=None,
143 exclude=None,
144 default=b'glob',
144 default=b'glob',
145 auditor=None,
145 auditor=None,
146 ctx=None,
146 ctx=None,
147 listsubrepos=False,
147 listsubrepos=False,
148 warn=None,
148 warn=None,
149 badfn=None,
149 badfn=None,
150 icasefs=False,
150 icasefs=False,
151 ):
151 ):
152 r"""build an object to match a set of file patterns
152 r"""build an object to match a set of file patterns
153
153
154 arguments:
154 arguments:
155 root - the canonical root of the tree you're matching against
155 root - the canonical root of the tree you're matching against
156 cwd - the current working directory, if relevant
156 cwd - the current working directory, if relevant
157 patterns - patterns to find
157 patterns - patterns to find
158 include - patterns to include (unless they are excluded)
158 include - patterns to include (unless they are excluded)
159 exclude - patterns to exclude (even if they are included)
159 exclude - patterns to exclude (even if they are included)
160 default - if a pattern in patterns has no explicit type, assume this one
160 default - if a pattern in patterns has no explicit type, assume this one
161 auditor - optional path auditor
161 auditor - optional path auditor
162 ctx - optional changecontext
162 ctx - optional changecontext
163 listsubrepos - if True, recurse into subrepositories
163 listsubrepos - if True, recurse into subrepositories
164 warn - optional function used for printing warnings
164 warn - optional function used for printing warnings
165 badfn - optional bad() callback for this matcher instead of the default
165 badfn - optional bad() callback for this matcher instead of the default
166 icasefs - make a matcher for wdir on case insensitive filesystems, which
166 icasefs - make a matcher for wdir on case insensitive filesystems, which
167 normalizes the given patterns to the case in the filesystem
167 normalizes the given patterns to the case in the filesystem
168
168
169 a pattern is one of:
169 a pattern is one of:
170 'glob:<glob>' - a glob relative to cwd
170 'glob:<glob>' - a glob relative to cwd
171 're:<regexp>' - a regular expression
171 're:<regexp>' - a regular expression
172 'path:<path>' - a path relative to repository root, which is matched
172 'path:<path>' - a path relative to repository root, which is matched
173 recursively
173 recursively
174 'rootfilesin:<path>' - a path relative to repository root, which is
174 'rootfilesin:<path>' - a path relative to repository root, which is
175 matched non-recursively (will not match subdirectories)
175 matched non-recursively (will not match subdirectories)
176 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
176 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
177 'relpath:<path>' - a path relative to cwd
177 'relpath:<path>' - a path relative to cwd
178 'relre:<regexp>' - a regexp that needn't match the start of a name
178 'relre:<regexp>' - a regexp that needn't match the start of a name
179 'set:<fileset>' - a fileset expression
179 'set:<fileset>' - a fileset expression
180 'include:<path>' - a file of patterns to read and include
180 'include:<path>' - a file of patterns to read and include
181 'subinclude:<path>' - a file of patterns to match against files under
181 'subinclude:<path>' - a file of patterns to match against files under
182 the same directory
182 the same directory
183 '<something>' - a pattern of the specified default type
183 '<something>' - a pattern of the specified default type
184
184
185 Usually a patternmatcher is returned:
185 Usually a patternmatcher is returned:
186 >>> match(b'foo', b'.', [b're:.*\.c$', b'path:foo/a', b'*.py'])
186 >>> match(b'foo', b'.', [b're:.*\.c$', b'path:foo/a', b'*.py'])
187 <patternmatcher patterns='.*\\.c$|foo/a(?:/|$)|[^/]*\\.py$'>
187 <patternmatcher patterns='.*\\.c$|foo/a(?:/|$)|[^/]*\\.py$'>
188
188
189 Combining 'patterns' with 'include' (resp. 'exclude') gives an
189 Combining 'patterns' with 'include' (resp. 'exclude') gives an
190 intersectionmatcher (resp. a differencematcher):
190 intersectionmatcher (resp. a differencematcher):
191 >>> type(match(b'foo', b'.', [b're:.*\.c$'], include=[b'path:lib']))
191 >>> type(match(b'foo', b'.', [b're:.*\.c$'], include=[b'path:lib']))
192 <class 'mercurial.match.intersectionmatcher'>
192 <class 'mercurial.match.intersectionmatcher'>
193 >>> type(match(b'foo', b'.', [b're:.*\.c$'], exclude=[b'path:build']))
193 >>> type(match(b'foo', b'.', [b're:.*\.c$'], exclude=[b'path:build']))
194 <class 'mercurial.match.differencematcher'>
194 <class 'mercurial.match.differencematcher'>
195
195
196 Notice that, if 'patterns' is empty, an alwaysmatcher is returned:
196 Notice that, if 'patterns' is empty, an alwaysmatcher is returned:
197 >>> match(b'foo', b'.', [])
197 >>> match(b'foo', b'.', [])
198 <alwaysmatcher>
198 <alwaysmatcher>
199
199
200 The 'default' argument determines which kind of pattern is assumed if a
200 The 'default' argument determines which kind of pattern is assumed if a
201 pattern has no prefix:
201 pattern has no prefix:
202 >>> match(b'foo', b'.', [b'.*\.c$'], default=b're')
202 >>> match(b'foo', b'.', [b'.*\.c$'], default=b're')
203 <patternmatcher patterns='.*\\.c$'>
203 <patternmatcher patterns='.*\\.c$'>
204 >>> match(b'foo', b'.', [b'main.py'], default=b'relpath')
204 >>> match(b'foo', b'.', [b'main.py'], default=b'relpath')
205 <patternmatcher patterns='main\\.py(?:/|$)'>
205 <patternmatcher patterns='main\\.py(?:/|$)'>
206 >>> match(b'foo', b'.', [b'main.py'], default=b're')
206 >>> match(b'foo', b'.', [b'main.py'], default=b're')
207 <patternmatcher patterns='main.py'>
207 <patternmatcher patterns='main.py'>
208
208
209 The primary use of matchers is to check whether a value (usually a file
209 The primary use of matchers is to check whether a value (usually a file
210 name) matches againset one of the patterns given at initialization. There
210 name) matches againset one of the patterns given at initialization. There
211 are two ways of doing this check.
211 are two ways of doing this check.
212
212
213 >>> m = match(b'foo', b'', [b're:.*\.c$', b'relpath:a'])
213 >>> m = match(b'foo', b'', [b're:.*\.c$', b'relpath:a'])
214
214
215 1. Calling the matcher with a file name returns True if any pattern
215 1. Calling the matcher with a file name returns True if any pattern
216 matches that file name:
216 matches that file name:
217 >>> m(b'a')
217 >>> m(b'a')
218 True
218 True
219 >>> m(b'main.c')
219 >>> m(b'main.c')
220 True
220 True
221 >>> m(b'test.py')
221 >>> m(b'test.py')
222 False
222 False
223
223
224 2. Using the exact() method only returns True if the file name matches one
224 2. Using the exact() method only returns True if the file name matches one
225 of the exact patterns (i.e. not re: or glob: patterns):
225 of the exact patterns (i.e. not re: or glob: patterns):
226 >>> m.exact(b'a')
226 >>> m.exact(b'a')
227 True
227 True
228 >>> m.exact(b'main.c')
228 >>> m.exact(b'main.c')
229 False
229 False
230 """
230 """
231 normalize = _donormalize
231 normalize = _donormalize
232 if icasefs:
232 if icasefs:
233 dirstate = ctx.repo().dirstate
233 dirstate = ctx.repo().dirstate
234 dsnormalize = dirstate.normalize
234 dsnormalize = dirstate.normalize
235
235
236 def normalize(patterns, default, root, cwd, auditor, warn):
236 def normalize(patterns, default, root, cwd, auditor, warn):
237 kp = _donormalize(patterns, default, root, cwd, auditor, warn)
237 kp = _donormalize(patterns, default, root, cwd, auditor, warn)
238 kindpats = []
238 kindpats = []
239 for kind, pats, source in kp:
239 for kind, pats, source in kp:
240 if kind not in (b're', b'relre'): # regex can't be normalized
240 if kind not in (b're', b'relre'): # regex can't be normalized
241 p = pats
241 p = pats
242 pats = dsnormalize(pats)
242 pats = dsnormalize(pats)
243
243
244 # Preserve the original to handle a case only rename.
244 # Preserve the original to handle a case only rename.
245 if p != pats and p in dirstate:
245 if p != pats and p in dirstate:
246 kindpats.append((kind, p, source))
246 kindpats.append((kind, p, source))
247
247
248 kindpats.append((kind, pats, source))
248 kindpats.append((kind, pats, source))
249 return kindpats
249 return kindpats
250
250
251 if patterns:
251 if patterns:
252 kindpats = normalize(patterns, default, root, cwd, auditor, warn)
252 kindpats = normalize(patterns, default, root, cwd, auditor, warn)
253 if _kindpatsalwaysmatch(kindpats):
253 if _kindpatsalwaysmatch(kindpats):
254 m = alwaysmatcher(badfn)
254 m = alwaysmatcher(badfn)
255 else:
255 else:
256 m = _buildkindpatsmatcher(
256 m = _buildkindpatsmatcher(
257 patternmatcher,
257 patternmatcher,
258 root,
258 root,
259 kindpats,
259 kindpats,
260 ctx=ctx,
260 ctx=ctx,
261 listsubrepos=listsubrepos,
261 listsubrepos=listsubrepos,
262 badfn=badfn,
262 badfn=badfn,
263 )
263 )
264 else:
264 else:
265 # It's a little strange that no patterns means to match everything.
265 # It's a little strange that no patterns means to match everything.
266 # Consider changing this to match nothing (probably using nevermatcher).
266 # Consider changing this to match nothing (probably using nevermatcher).
267 m = alwaysmatcher(badfn)
267 m = alwaysmatcher(badfn)
268
268
269 if include:
269 if include:
270 kindpats = normalize(include, b'glob', root, cwd, auditor, warn)
270 kindpats = normalize(include, b'glob', root, cwd, auditor, warn)
271 im = _buildkindpatsmatcher(
271 im = _buildkindpatsmatcher(
272 includematcher,
272 includematcher,
273 root,
273 root,
274 kindpats,
274 kindpats,
275 ctx=ctx,
275 ctx=ctx,
276 listsubrepos=listsubrepos,
276 listsubrepos=listsubrepos,
277 badfn=None,
277 badfn=None,
278 )
278 )
279 m = intersectmatchers(m, im)
279 m = intersectmatchers(m, im)
280 if exclude:
280 if exclude:
281 kindpats = normalize(exclude, b'glob', root, cwd, auditor, warn)
281 kindpats = normalize(exclude, b'glob', root, cwd, auditor, warn)
282 em = _buildkindpatsmatcher(
282 em = _buildkindpatsmatcher(
283 includematcher,
283 includematcher,
284 root,
284 root,
285 kindpats,
285 kindpats,
286 ctx=ctx,
286 ctx=ctx,
287 listsubrepos=listsubrepos,
287 listsubrepos=listsubrepos,
288 badfn=None,
288 badfn=None,
289 )
289 )
290 m = differencematcher(m, em)
290 m = differencematcher(m, em)
291 return m
291 return m
292
292
293
293
294 def exact(files, badfn=None):
294 def exact(files, badfn=None):
295 return exactmatcher(files, badfn=badfn)
295 return exactmatcher(files, badfn=badfn)
296
296
297
297
298 def always(badfn=None):
298 def always(badfn=None):
299 return alwaysmatcher(badfn)
299 return alwaysmatcher(badfn)
300
300
301
301
302 def never(badfn=None):
302 def never(badfn=None):
303 return nevermatcher(badfn)
303 return nevermatcher(badfn)
304
304
305
305
306 def badmatch(match, badfn):
306 def badmatch(match, badfn):
307 """Make a copy of the given matcher, replacing its bad method with the given
307 """Make a copy of the given matcher, replacing its bad method with the given
308 one.
308 one.
309 """
309 """
310 m = copy.copy(match)
310 m = copy.copy(match)
311 m.bad = badfn
311 m.bad = badfn
312 return m
312 return m
313
313
314
314
315 def _donormalize(patterns, default, root, cwd, auditor=None, warn=None):
315 def _donormalize(patterns, default, root, cwd, auditor=None, warn=None):
316 '''Convert 'kind:pat' from the patterns list to tuples with kind and
316 '''Convert 'kind:pat' from the patterns list to tuples with kind and
317 normalized and rooted patterns and with listfiles expanded.'''
317 normalized and rooted patterns and with listfiles expanded.'''
318 kindpats = []
318 kindpats = []
319 for kind, pat in [_patsplit(p, default) for p in patterns]:
319 for kind, pat in [_patsplit(p, default) for p in patterns]:
320 if kind in cwdrelativepatternkinds:
320 if kind in cwdrelativepatternkinds:
321 pat = pathutil.canonpath(root, cwd, pat, auditor=auditor)
321 pat = pathutil.canonpath(root, cwd, pat, auditor=auditor)
322 elif kind in (b'relglob', b'path', b'rootfilesin', b'rootglob'):
322 elif kind in (b'relglob', b'path', b'rootfilesin', b'rootglob'):
323 pat = util.normpath(pat)
323 pat = util.normpath(pat)
324 elif kind in (b'listfile', b'listfile0'):
324 elif kind in (b'listfile', b'listfile0'):
325 try:
325 try:
326 files = util.readfile(pat)
326 files = util.readfile(pat)
327 if kind == b'listfile0':
327 if kind == b'listfile0':
328 files = files.split(b'\0')
328 files = files.split(b'\0')
329 else:
329 else:
330 files = files.splitlines()
330 files = files.splitlines()
331 files = [f for f in files if f]
331 files = [f for f in files if f]
332 except EnvironmentError:
332 except EnvironmentError:
333 raise error.Abort(_(b"unable to read file list (%s)") % pat)
333 raise error.Abort(_(b"unable to read file list (%s)") % pat)
334 for k, p, source in _donormalize(
334 for k, p, source in _donormalize(
335 files, default, root, cwd, auditor, warn
335 files, default, root, cwd, auditor, warn
336 ):
336 ):
337 kindpats.append((k, p, pat))
337 kindpats.append((k, p, pat))
338 continue
338 continue
339 elif kind == b'include':
339 elif kind == b'include':
340 try:
340 try:
341 fullpath = os.path.join(root, util.localpath(pat))
341 fullpath = os.path.join(root, util.localpath(pat))
342 includepats = readpatternfile(fullpath, warn)
342 includepats = readpatternfile(fullpath, warn)
343 for k, p, source in _donormalize(
343 for k, p, source in _donormalize(
344 includepats, default, root, cwd, auditor, warn
344 includepats, default, root, cwd, auditor, warn
345 ):
345 ):
346 kindpats.append((k, p, source or pat))
346 kindpats.append((k, p, source or pat))
347 except error.Abort as inst:
347 except error.Abort as inst:
348 raise error.Abort(
348 raise error.Abort(
349 b'%s: %s'
349 b'%s: %s'
350 % (pat, inst[0]) # pytype: disable=unsupported-operands
350 % (pat, inst[0]) # pytype: disable=unsupported-operands
351 )
351 )
352 except IOError as inst:
352 except IOError as inst:
353 if warn:
353 if warn:
354 warn(
354 warn(
355 _(b"skipping unreadable pattern file '%s': %s\n")
355 _(b"skipping unreadable pattern file '%s': %s\n")
356 % (pat, stringutil.forcebytestr(inst.strerror))
356 % (pat, stringutil.forcebytestr(inst.strerror))
357 )
357 )
358 continue
358 continue
359 # else: re or relre - which cannot be normalized
359 # else: re or relre - which cannot be normalized
360 kindpats.append((kind, pat, b''))
360 kindpats.append((kind, pat, b''))
361 return kindpats
361 return kindpats
362
362
363
363
364 class basematcher(object):
364 class basematcher(object):
365 def __init__(self, badfn=None):
365 def __init__(self, badfn=None):
366 if badfn is not None:
366 if badfn is not None:
367 self.bad = badfn
367 self.bad = badfn
368
368
369 def __call__(self, fn):
369 def __call__(self, fn):
370 return self.matchfn(fn)
370 return self.matchfn(fn)
371
371
372 # Callbacks related to how the matcher is used by dirstate.walk.
372 # Callbacks related to how the matcher is used by dirstate.walk.
373 # Subscribers to these events must monkeypatch the matcher object.
373 # Subscribers to these events must monkeypatch the matcher object.
374 def bad(self, f, msg):
374 def bad(self, f, msg):
375 '''Callback from dirstate.walk for each explicit file that can't be
375 '''Callback from dirstate.walk for each explicit file that can't be
376 found/accessed, with an error message.'''
376 found/accessed, with an error message.'''
377
377
378 # If an explicitdir is set, it will be called when an explicitly listed
379 # directory is visited.
380 explicitdir = None
381
382 # If an traversedir is set, it will be called when a directory discovered
378 # If an traversedir is set, it will be called when a directory discovered
383 # by recursive traversal is visited.
379 # by recursive traversal is visited.
384 traversedir = None
380 traversedir = None
385
381
386 @propertycache
382 @propertycache
387 def _files(self):
383 def _files(self):
388 return []
384 return []
389
385
390 def files(self):
386 def files(self):
391 '''Explicitly listed files or patterns or roots:
387 '''Explicitly listed files or patterns or roots:
392 if no patterns or .always(): empty list,
388 if no patterns or .always(): empty list,
393 if exact: list exact files,
389 if exact: list exact files,
394 if not .anypats(): list all files and dirs,
390 if not .anypats(): list all files and dirs,
395 else: optimal roots'''
391 else: optimal roots'''
396 return self._files
392 return self._files
397
393
398 @propertycache
394 @propertycache
399 def _fileset(self):
395 def _fileset(self):
400 return set(self._files)
396 return set(self._files)
401
397
402 def exact(self, f):
398 def exact(self, f):
403 '''Returns True if f is in .files().'''
399 '''Returns True if f is in .files().'''
404 return f in self._fileset
400 return f in self._fileset
405
401
406 def matchfn(self, f):
402 def matchfn(self, f):
407 return False
403 return False
408
404
409 def visitdir(self, dir):
405 def visitdir(self, dir):
410 '''Decides whether a directory should be visited based on whether it
406 '''Decides whether a directory should be visited based on whether it
411 has potential matches in it or one of its subdirectories. This is
407 has potential matches in it or one of its subdirectories. This is
412 based on the match's primary, included, and excluded patterns.
408 based on the match's primary, included, and excluded patterns.
413
409
414 Returns the string 'all' if the given directory and all subdirectories
410 Returns the string 'all' if the given directory and all subdirectories
415 should be visited. Otherwise returns True or False indicating whether
411 should be visited. Otherwise returns True or False indicating whether
416 the given directory should be visited.
412 the given directory should be visited.
417 '''
413 '''
418 return True
414 return True
419
415
420 def visitchildrenset(self, dir):
416 def visitchildrenset(self, dir):
421 '''Decides whether a directory should be visited based on whether it
417 '''Decides whether a directory should be visited based on whether it
422 has potential matches in it or one of its subdirectories, and
418 has potential matches in it or one of its subdirectories, and
423 potentially lists which subdirectories of that directory should be
419 potentially lists which subdirectories of that directory should be
424 visited. This is based on the match's primary, included, and excluded
420 visited. This is based on the match's primary, included, and excluded
425 patterns.
421 patterns.
426
422
427 This function is very similar to 'visitdir', and the following mapping
423 This function is very similar to 'visitdir', and the following mapping
428 can be applied:
424 can be applied:
429
425
430 visitdir | visitchildrenlist
426 visitdir | visitchildrenlist
431 ----------+-------------------
427 ----------+-------------------
432 False | set()
428 False | set()
433 'all' | 'all'
429 'all' | 'all'
434 True | 'this' OR non-empty set of subdirs -or files- to visit
430 True | 'this' OR non-empty set of subdirs -or files- to visit
435
431
436 Example:
432 Example:
437 Assume matchers ['path:foo/bar', 'rootfilesin:qux'], we would return
433 Assume matchers ['path:foo/bar', 'rootfilesin:qux'], we would return
438 the following values (assuming the implementation of visitchildrenset
434 the following values (assuming the implementation of visitchildrenset
439 is capable of recognizing this; some implementations are not).
435 is capable of recognizing this; some implementations are not).
440
436
441 '' -> {'foo', 'qux'}
437 '' -> {'foo', 'qux'}
442 'baz' -> set()
438 'baz' -> set()
443 'foo' -> {'bar'}
439 'foo' -> {'bar'}
444 # Ideally this would be 'all', but since the prefix nature of matchers
440 # Ideally this would be 'all', but since the prefix nature of matchers
445 # is applied to the entire matcher, we have to downgrade this to
441 # is applied to the entire matcher, we have to downgrade this to
446 # 'this' due to the non-prefix 'rootfilesin'-kind matcher being mixed
442 # 'this' due to the non-prefix 'rootfilesin'-kind matcher being mixed
447 # in.
443 # in.
448 'foo/bar' -> 'this'
444 'foo/bar' -> 'this'
449 'qux' -> 'this'
445 'qux' -> 'this'
450
446
451 Important:
447 Important:
452 Most matchers do not know if they're representing files or
448 Most matchers do not know if they're representing files or
453 directories. They see ['path:dir/f'] and don't know whether 'f' is a
449 directories. They see ['path:dir/f'] and don't know whether 'f' is a
454 file or a directory, so visitchildrenset('dir') for most matchers will
450 file or a directory, so visitchildrenset('dir') for most matchers will
455 return {'f'}, but if the matcher knows it's a file (like exactmatcher
451 return {'f'}, but if the matcher knows it's a file (like exactmatcher
456 does), it may return 'this'. Do not rely on the return being a set
452 does), it may return 'this'. Do not rely on the return being a set
457 indicating that there are no files in this dir to investigate (or
453 indicating that there are no files in this dir to investigate (or
458 equivalently that if there are files to investigate in 'dir' that it
454 equivalently that if there are files to investigate in 'dir' that it
459 will always return 'this').
455 will always return 'this').
460 '''
456 '''
461 return b'this'
457 return b'this'
462
458
463 def always(self):
459 def always(self):
464 '''Matcher will match everything and .files() will be empty --
460 '''Matcher will match everything and .files() will be empty --
465 optimization might be possible.'''
461 optimization might be possible.'''
466 return False
462 return False
467
463
468 def isexact(self):
464 def isexact(self):
469 '''Matcher will match exactly the list of files in .files() --
465 '''Matcher will match exactly the list of files in .files() --
470 optimization might be possible.'''
466 optimization might be possible.'''
471 return False
467 return False
472
468
473 def prefix(self):
469 def prefix(self):
474 '''Matcher will match the paths in .files() recursively --
470 '''Matcher will match the paths in .files() recursively --
475 optimization might be possible.'''
471 optimization might be possible.'''
476 return False
472 return False
477
473
478 def anypats(self):
474 def anypats(self):
479 '''None of .always(), .isexact(), and .prefix() is true --
475 '''None of .always(), .isexact(), and .prefix() is true --
480 optimizations will be difficult.'''
476 optimizations will be difficult.'''
481 return not self.always() and not self.isexact() and not self.prefix()
477 return not self.always() and not self.isexact() and not self.prefix()
482
478
483
479
484 class alwaysmatcher(basematcher):
480 class alwaysmatcher(basematcher):
485 '''Matches everything.'''
481 '''Matches everything.'''
486
482
487 def __init__(self, badfn=None):
483 def __init__(self, badfn=None):
488 super(alwaysmatcher, self).__init__(badfn)
484 super(alwaysmatcher, self).__init__(badfn)
489
485
490 def always(self):
486 def always(self):
491 return True
487 return True
492
488
493 def matchfn(self, f):
489 def matchfn(self, f):
494 return True
490 return True
495
491
496 def visitdir(self, dir):
492 def visitdir(self, dir):
497 return b'all'
493 return b'all'
498
494
499 def visitchildrenset(self, dir):
495 def visitchildrenset(self, dir):
500 return b'all'
496 return b'all'
501
497
502 def __repr__(self):
498 def __repr__(self):
503 return r'<alwaysmatcher>'
499 return r'<alwaysmatcher>'
504
500
505
501
506 class nevermatcher(basematcher):
502 class nevermatcher(basematcher):
507 '''Matches nothing.'''
503 '''Matches nothing.'''
508
504
509 def __init__(self, badfn=None):
505 def __init__(self, badfn=None):
510 super(nevermatcher, self).__init__(badfn)
506 super(nevermatcher, self).__init__(badfn)
511
507
512 # It's a little weird to say that the nevermatcher is an exact matcher
508 # It's a little weird to say that the nevermatcher is an exact matcher
513 # or a prefix matcher, but it seems to make sense to let callers take
509 # or a prefix matcher, but it seems to make sense to let callers take
514 # fast paths based on either. There will be no exact matches, nor any
510 # fast paths based on either. There will be no exact matches, nor any
515 # prefixes (files() returns []), so fast paths iterating over them should
511 # prefixes (files() returns []), so fast paths iterating over them should
516 # be efficient (and correct).
512 # be efficient (and correct).
517 def isexact(self):
513 def isexact(self):
518 return True
514 return True
519
515
520 def prefix(self):
516 def prefix(self):
521 return True
517 return True
522
518
523 def visitdir(self, dir):
519 def visitdir(self, dir):
524 return False
520 return False
525
521
526 def visitchildrenset(self, dir):
522 def visitchildrenset(self, dir):
527 return set()
523 return set()
528
524
529 def __repr__(self):
525 def __repr__(self):
530 return r'<nevermatcher>'
526 return r'<nevermatcher>'
531
527
532
528
533 class predicatematcher(basematcher):
529 class predicatematcher(basematcher):
534 """A matcher adapter for a simple boolean function"""
530 """A matcher adapter for a simple boolean function"""
535
531
536 def __init__(self, predfn, predrepr=None, badfn=None):
532 def __init__(self, predfn, predrepr=None, badfn=None):
537 super(predicatematcher, self).__init__(badfn)
533 super(predicatematcher, self).__init__(badfn)
538 self.matchfn = predfn
534 self.matchfn = predfn
539 self._predrepr = predrepr
535 self._predrepr = predrepr
540
536
541 @encoding.strmethod
537 @encoding.strmethod
542 def __repr__(self):
538 def __repr__(self):
543 s = stringutil.buildrepr(self._predrepr) or pycompat.byterepr(
539 s = stringutil.buildrepr(self._predrepr) or pycompat.byterepr(
544 self.matchfn
540 self.matchfn
545 )
541 )
546 return b'<predicatenmatcher pred=%s>' % s
542 return b'<predicatenmatcher pred=%s>' % s
547
543
548
544
549 class patternmatcher(basematcher):
545 class patternmatcher(basematcher):
550 """Matches a set of (kind, pat, source) against a 'root' directory.
546 """Matches a set of (kind, pat, source) against a 'root' directory.
551
547
552 >>> kindpats = [
548 >>> kindpats = [
553 ... (b're', br'.*\.c$', b''),
549 ... (b're', br'.*\.c$', b''),
554 ... (b'path', b'foo/a', b''),
550 ... (b'path', b'foo/a', b''),
555 ... (b'relpath', b'b', b''),
551 ... (b'relpath', b'b', b''),
556 ... (b'glob', b'*.h', b''),
552 ... (b'glob', b'*.h', b''),
557 ... ]
553 ... ]
558 >>> m = patternmatcher(b'foo', kindpats)
554 >>> m = patternmatcher(b'foo', kindpats)
559 >>> m(b'main.c') # matches re:.*\.c$
555 >>> m(b'main.c') # matches re:.*\.c$
560 True
556 True
561 >>> m(b'b.txt')
557 >>> m(b'b.txt')
562 False
558 False
563 >>> m(b'foo/a') # matches path:foo/a
559 >>> m(b'foo/a') # matches path:foo/a
564 True
560 True
565 >>> m(b'a') # does not match path:b, since 'root' is 'foo'
561 >>> m(b'a') # does not match path:b, since 'root' is 'foo'
566 False
562 False
567 >>> m(b'b') # matches relpath:b, since 'root' is 'foo'
563 >>> m(b'b') # matches relpath:b, since 'root' is 'foo'
568 True
564 True
569 >>> m(b'lib.h') # matches glob:*.h
565 >>> m(b'lib.h') # matches glob:*.h
570 True
566 True
571
567
572 >>> m.files()
568 >>> m.files()
573 ['', 'foo/a', 'b', '']
569 ['', 'foo/a', 'b', '']
574 >>> m.exact(b'foo/a')
570 >>> m.exact(b'foo/a')
575 True
571 True
576 >>> m.exact(b'b')
572 >>> m.exact(b'b')
577 True
573 True
578 >>> m.exact(b'lib.h') # exact matches are for (rel)path kinds
574 >>> m.exact(b'lib.h') # exact matches are for (rel)path kinds
579 False
575 False
580 """
576 """
581
577
582 def __init__(self, root, kindpats, badfn=None):
578 def __init__(self, root, kindpats, badfn=None):
583 super(patternmatcher, self).__init__(badfn)
579 super(patternmatcher, self).__init__(badfn)
584
580
585 self._files = _explicitfiles(kindpats)
581 self._files = _explicitfiles(kindpats)
586 self._prefix = _prefix(kindpats)
582 self._prefix = _prefix(kindpats)
587 self._pats, self.matchfn = _buildmatch(kindpats, b'$', root)
583 self._pats, self.matchfn = _buildmatch(kindpats, b'$', root)
588
584
589 @propertycache
585 @propertycache
590 def _dirs(self):
586 def _dirs(self):
591 return set(pathutil.dirs(self._fileset))
587 return set(pathutil.dirs(self._fileset))
592
588
593 def visitdir(self, dir):
589 def visitdir(self, dir):
594 if self._prefix and dir in self._fileset:
590 if self._prefix and dir in self._fileset:
595 return b'all'
591 return b'all'
596 return (
592 return (
597 dir in self._fileset
593 dir in self._fileset
598 or dir in self._dirs
594 or dir in self._dirs
599 or any(
595 or any(
600 parentdir in self._fileset
596 parentdir in self._fileset
601 for parentdir in pathutil.finddirs(dir)
597 for parentdir in pathutil.finddirs(dir)
602 )
598 )
603 )
599 )
604
600
605 def visitchildrenset(self, dir):
601 def visitchildrenset(self, dir):
606 ret = self.visitdir(dir)
602 ret = self.visitdir(dir)
607 if ret is True:
603 if ret is True:
608 return b'this'
604 return b'this'
609 elif not ret:
605 elif not ret:
610 return set()
606 return set()
611 assert ret == b'all'
607 assert ret == b'all'
612 return b'all'
608 return b'all'
613
609
614 def prefix(self):
610 def prefix(self):
615 return self._prefix
611 return self._prefix
616
612
617 @encoding.strmethod
613 @encoding.strmethod
618 def __repr__(self):
614 def __repr__(self):
619 return b'<patternmatcher patterns=%r>' % pycompat.bytestr(self._pats)
615 return b'<patternmatcher patterns=%r>' % pycompat.bytestr(self._pats)
620
616
621
617
622 # This is basically a reimplementation of pathutil.dirs that stores the
618 # This is basically a reimplementation of pathutil.dirs that stores the
623 # children instead of just a count of them, plus a small optional optimization
619 # children instead of just a count of them, plus a small optional optimization
624 # to avoid some directories we don't need.
620 # to avoid some directories we don't need.
625 class _dirchildren(object):
621 class _dirchildren(object):
626 def __init__(self, paths, onlyinclude=None):
622 def __init__(self, paths, onlyinclude=None):
627 self._dirs = {}
623 self._dirs = {}
628 self._onlyinclude = onlyinclude or []
624 self._onlyinclude = onlyinclude or []
629 addpath = self.addpath
625 addpath = self.addpath
630 for f in paths:
626 for f in paths:
631 addpath(f)
627 addpath(f)
632
628
633 def addpath(self, path):
629 def addpath(self, path):
634 if path == b'':
630 if path == b'':
635 return
631 return
636 dirs = self._dirs
632 dirs = self._dirs
637 findsplitdirs = _dirchildren._findsplitdirs
633 findsplitdirs = _dirchildren._findsplitdirs
638 for d, b in findsplitdirs(path):
634 for d, b in findsplitdirs(path):
639 if d not in self._onlyinclude:
635 if d not in self._onlyinclude:
640 continue
636 continue
641 dirs.setdefault(d, set()).add(b)
637 dirs.setdefault(d, set()).add(b)
642
638
643 @staticmethod
639 @staticmethod
644 def _findsplitdirs(path):
640 def _findsplitdirs(path):
645 # yields (dirname, basename) tuples, walking back to the root. This is
641 # yields (dirname, basename) tuples, walking back to the root. This is
646 # very similar to pathutil.finddirs, except:
642 # very similar to pathutil.finddirs, except:
647 # - produces a (dirname, basename) tuple, not just 'dirname'
643 # - produces a (dirname, basename) tuple, not just 'dirname'
648 # Unlike manifest._splittopdir, this does not suffix `dirname` with a
644 # Unlike manifest._splittopdir, this does not suffix `dirname` with a
649 # slash.
645 # slash.
650 oldpos = len(path)
646 oldpos = len(path)
651 pos = path.rfind(b'/')
647 pos = path.rfind(b'/')
652 while pos != -1:
648 while pos != -1:
653 yield path[:pos], path[pos + 1 : oldpos]
649 yield path[:pos], path[pos + 1 : oldpos]
654 oldpos = pos
650 oldpos = pos
655 pos = path.rfind(b'/', 0, pos)
651 pos = path.rfind(b'/', 0, pos)
656 yield b'', path[:oldpos]
652 yield b'', path[:oldpos]
657
653
658 def get(self, path):
654 def get(self, path):
659 return self._dirs.get(path, set())
655 return self._dirs.get(path, set())
660
656
661
657
662 class includematcher(basematcher):
658 class includematcher(basematcher):
663 def __init__(self, root, kindpats, badfn=None):
659 def __init__(self, root, kindpats, badfn=None):
664 super(includematcher, self).__init__(badfn)
660 super(includematcher, self).__init__(badfn)
665
661
666 self._pats, self.matchfn = _buildmatch(kindpats, b'(?:/|$)', root)
662 self._pats, self.matchfn = _buildmatch(kindpats, b'(?:/|$)', root)
667 self._prefix = _prefix(kindpats)
663 self._prefix = _prefix(kindpats)
668 roots, dirs, parents = _rootsdirsandparents(kindpats)
664 roots, dirs, parents = _rootsdirsandparents(kindpats)
669 # roots are directories which are recursively included.
665 # roots are directories which are recursively included.
670 self._roots = set(roots)
666 self._roots = set(roots)
671 # dirs are directories which are non-recursively included.
667 # dirs are directories which are non-recursively included.
672 self._dirs = set(dirs)
668 self._dirs = set(dirs)
673 # parents are directories which are non-recursively included because
669 # parents are directories which are non-recursively included because
674 # they are needed to get to items in _dirs or _roots.
670 # they are needed to get to items in _dirs or _roots.
675 self._parents = parents
671 self._parents = parents
676
672
677 def visitdir(self, dir):
673 def visitdir(self, dir):
678 if self._prefix and dir in self._roots:
674 if self._prefix and dir in self._roots:
679 return b'all'
675 return b'all'
680 return (
676 return (
681 dir in self._roots
677 dir in self._roots
682 or dir in self._dirs
678 or dir in self._dirs
683 or dir in self._parents
679 or dir in self._parents
684 or any(
680 or any(
685 parentdir in self._roots for parentdir in pathutil.finddirs(dir)
681 parentdir in self._roots for parentdir in pathutil.finddirs(dir)
686 )
682 )
687 )
683 )
688
684
689 @propertycache
685 @propertycache
690 def _allparentschildren(self):
686 def _allparentschildren(self):
691 # It may seem odd that we add dirs, roots, and parents, and then
687 # It may seem odd that we add dirs, roots, and parents, and then
692 # restrict to only parents. This is to catch the case of:
688 # restrict to only parents. This is to catch the case of:
693 # dirs = ['foo/bar']
689 # dirs = ['foo/bar']
694 # parents = ['foo']
690 # parents = ['foo']
695 # if we asked for the children of 'foo', but had only added
691 # if we asked for the children of 'foo', but had only added
696 # self._parents, we wouldn't be able to respond ['bar'].
692 # self._parents, we wouldn't be able to respond ['bar'].
697 return _dirchildren(
693 return _dirchildren(
698 itertools.chain(self._dirs, self._roots, self._parents),
694 itertools.chain(self._dirs, self._roots, self._parents),
699 onlyinclude=self._parents,
695 onlyinclude=self._parents,
700 )
696 )
701
697
702 def visitchildrenset(self, dir):
698 def visitchildrenset(self, dir):
703 if self._prefix and dir in self._roots:
699 if self._prefix and dir in self._roots:
704 return b'all'
700 return b'all'
705 # Note: this does *not* include the 'dir in self._parents' case from
701 # Note: this does *not* include the 'dir in self._parents' case from
706 # visitdir, that's handled below.
702 # visitdir, that's handled below.
707 if (
703 if (
708 b'' in self._roots
704 b'' in self._roots
709 or dir in self._roots
705 or dir in self._roots
710 or dir in self._dirs
706 or dir in self._dirs
711 or any(
707 or any(
712 parentdir in self._roots for parentdir in pathutil.finddirs(dir)
708 parentdir in self._roots for parentdir in pathutil.finddirs(dir)
713 )
709 )
714 ):
710 ):
715 return b'this'
711 return b'this'
716
712
717 if dir in self._parents:
713 if dir in self._parents:
718 return self._allparentschildren.get(dir) or set()
714 return self._allparentschildren.get(dir) or set()
719 return set()
715 return set()
720
716
721 @encoding.strmethod
717 @encoding.strmethod
722 def __repr__(self):
718 def __repr__(self):
723 return b'<includematcher includes=%r>' % pycompat.bytestr(self._pats)
719 return b'<includematcher includes=%r>' % pycompat.bytestr(self._pats)
724
720
725
721
726 class exactmatcher(basematcher):
722 class exactmatcher(basematcher):
727 r'''Matches the input files exactly. They are interpreted as paths, not
723 r'''Matches the input files exactly. They are interpreted as paths, not
728 patterns (so no kind-prefixes).
724 patterns (so no kind-prefixes).
729
725
730 >>> m = exactmatcher([b'a.txt', br're:.*\.c$'])
726 >>> m = exactmatcher([b'a.txt', br're:.*\.c$'])
731 >>> m(b'a.txt')
727 >>> m(b'a.txt')
732 True
728 True
733 >>> m(b'b.txt')
729 >>> m(b'b.txt')
734 False
730 False
735
731
736 Input files that would be matched are exactly those returned by .files()
732 Input files that would be matched are exactly those returned by .files()
737 >>> m.files()
733 >>> m.files()
738 ['a.txt', 're:.*\\.c$']
734 ['a.txt', 're:.*\\.c$']
739
735
740 So pattern 're:.*\.c$' is not considered as a regex, but as a file name
736 So pattern 're:.*\.c$' is not considered as a regex, but as a file name
741 >>> m(b'main.c')
737 >>> m(b'main.c')
742 False
738 False
743 >>> m(br're:.*\.c$')
739 >>> m(br're:.*\.c$')
744 True
740 True
745 '''
741 '''
746
742
747 def __init__(self, files, badfn=None):
743 def __init__(self, files, badfn=None):
748 super(exactmatcher, self).__init__(badfn)
744 super(exactmatcher, self).__init__(badfn)
749
745
750 if isinstance(files, list):
746 if isinstance(files, list):
751 self._files = files
747 self._files = files
752 else:
748 else:
753 self._files = list(files)
749 self._files = list(files)
754
750
755 matchfn = basematcher.exact
751 matchfn = basematcher.exact
756
752
757 @propertycache
753 @propertycache
758 def _dirs(self):
754 def _dirs(self):
759 return set(pathutil.dirs(self._fileset))
755 return set(pathutil.dirs(self._fileset))
760
756
761 def visitdir(self, dir):
757 def visitdir(self, dir):
762 return dir in self._dirs
758 return dir in self._dirs
763
759
764 def visitchildrenset(self, dir):
760 def visitchildrenset(self, dir):
765 if not self._fileset or dir not in self._dirs:
761 if not self._fileset or dir not in self._dirs:
766 return set()
762 return set()
767
763
768 candidates = self._fileset | self._dirs - {b''}
764 candidates = self._fileset | self._dirs - {b''}
769 if dir != b'':
765 if dir != b'':
770 d = dir + b'/'
766 d = dir + b'/'
771 candidates = set(c[len(d) :] for c in candidates if c.startswith(d))
767 candidates = set(c[len(d) :] for c in candidates if c.startswith(d))
772 # self._dirs includes all of the directories, recursively, so if
768 # self._dirs includes all of the directories, recursively, so if
773 # we're attempting to match foo/bar/baz.txt, it'll have '', 'foo',
769 # we're attempting to match foo/bar/baz.txt, it'll have '', 'foo',
774 # 'foo/bar' in it. Thus we can safely ignore a candidate that has a
770 # 'foo/bar' in it. Thus we can safely ignore a candidate that has a
775 # '/' in it, indicating a it's for a subdir-of-a-subdir; the
771 # '/' in it, indicating a it's for a subdir-of-a-subdir; the
776 # immediate subdir will be in there without a slash.
772 # immediate subdir will be in there without a slash.
777 ret = {c for c in candidates if b'/' not in c}
773 ret = {c for c in candidates if b'/' not in c}
778 # We really do not expect ret to be empty, since that would imply that
774 # We really do not expect ret to be empty, since that would imply that
779 # there's something in _dirs that didn't have a file in _fileset.
775 # there's something in _dirs that didn't have a file in _fileset.
780 assert ret
776 assert ret
781 return ret
777 return ret
782
778
783 def isexact(self):
779 def isexact(self):
784 return True
780 return True
785
781
786 @encoding.strmethod
782 @encoding.strmethod
787 def __repr__(self):
783 def __repr__(self):
788 return b'<exactmatcher files=%r>' % self._files
784 return b'<exactmatcher files=%r>' % self._files
789
785
790
786
791 class differencematcher(basematcher):
787 class differencematcher(basematcher):
792 '''Composes two matchers by matching if the first matches and the second
788 '''Composes two matchers by matching if the first matches and the second
793 does not.
789 does not.
794
790
795 The second matcher's non-matching-attributes (bad, explicitdir,
791 The second matcher's non-matching-attributes (bad, traversedir) are ignored.
796 traversedir) are ignored.
797 '''
792 '''
798
793
799 def __init__(self, m1, m2):
794 def __init__(self, m1, m2):
800 super(differencematcher, self).__init__()
795 super(differencematcher, self).__init__()
801 self._m1 = m1
796 self._m1 = m1
802 self._m2 = m2
797 self._m2 = m2
803 self.bad = m1.bad
798 self.bad = m1.bad
804 self.explicitdir = m1.explicitdir
805 self.traversedir = m1.traversedir
799 self.traversedir = m1.traversedir
806
800
807 def matchfn(self, f):
801 def matchfn(self, f):
808 return self._m1(f) and not self._m2(f)
802 return self._m1(f) and not self._m2(f)
809
803
810 @propertycache
804 @propertycache
811 def _files(self):
805 def _files(self):
812 if self.isexact():
806 if self.isexact():
813 return [f for f in self._m1.files() if self(f)]
807 return [f for f in self._m1.files() if self(f)]
814 # If m1 is not an exact matcher, we can't easily figure out the set of
808 # If m1 is not an exact matcher, we can't easily figure out the set of
815 # files, because its files() are not always files. For example, if
809 # files, because its files() are not always files. For example, if
816 # m1 is "path:dir" and m2 is "rootfileins:.", we don't
810 # m1 is "path:dir" and m2 is "rootfileins:.", we don't
817 # want to remove "dir" from the set even though it would match m2,
811 # want to remove "dir" from the set even though it would match m2,
818 # because the "dir" in m1 may not be a file.
812 # because the "dir" in m1 may not be a file.
819 return self._m1.files()
813 return self._m1.files()
820
814
821 def visitdir(self, dir):
815 def visitdir(self, dir):
822 if self._m2.visitdir(dir) == b'all':
816 if self._m2.visitdir(dir) == b'all':
823 return False
817 return False
824 elif not self._m2.visitdir(dir):
818 elif not self._m2.visitdir(dir):
825 # m2 does not match dir, we can return 'all' here if possible
819 # m2 does not match dir, we can return 'all' here if possible
826 return self._m1.visitdir(dir)
820 return self._m1.visitdir(dir)
827 return bool(self._m1.visitdir(dir))
821 return bool(self._m1.visitdir(dir))
828
822
829 def visitchildrenset(self, dir):
823 def visitchildrenset(self, dir):
830 m2_set = self._m2.visitchildrenset(dir)
824 m2_set = self._m2.visitchildrenset(dir)
831 if m2_set == b'all':
825 if m2_set == b'all':
832 return set()
826 return set()
833 m1_set = self._m1.visitchildrenset(dir)
827 m1_set = self._m1.visitchildrenset(dir)
834 # Possible values for m1: 'all', 'this', set(...), set()
828 # Possible values for m1: 'all', 'this', set(...), set()
835 # Possible values for m2: 'this', set(...), set()
829 # Possible values for m2: 'this', set(...), set()
836 # If m2 has nothing under here that we care about, return m1, even if
830 # If m2 has nothing under here that we care about, return m1, even if
837 # it's 'all'. This is a change in behavior from visitdir, which would
831 # it's 'all'. This is a change in behavior from visitdir, which would
838 # return True, not 'all', for some reason.
832 # return True, not 'all', for some reason.
839 if not m2_set:
833 if not m2_set:
840 return m1_set
834 return m1_set
841 if m1_set in [b'all', b'this']:
835 if m1_set in [b'all', b'this']:
842 # Never return 'all' here if m2_set is any kind of non-empty (either
836 # Never return 'all' here if m2_set is any kind of non-empty (either
843 # 'this' or set(foo)), since m2 might return set() for a
837 # 'this' or set(foo)), since m2 might return set() for a
844 # subdirectory.
838 # subdirectory.
845 return b'this'
839 return b'this'
846 # Possible values for m1: set(...), set()
840 # Possible values for m1: set(...), set()
847 # Possible values for m2: 'this', set(...)
841 # Possible values for m2: 'this', set(...)
848 # We ignore m2's set results. They're possibly incorrect:
842 # We ignore m2's set results. They're possibly incorrect:
849 # m1 = path:dir/subdir, m2=rootfilesin:dir, visitchildrenset(''):
843 # m1 = path:dir/subdir, m2=rootfilesin:dir, visitchildrenset(''):
850 # m1 returns {'dir'}, m2 returns {'dir'}, if we subtracted we'd
844 # m1 returns {'dir'}, m2 returns {'dir'}, if we subtracted we'd
851 # return set(), which is *not* correct, we still need to visit 'dir'!
845 # return set(), which is *not* correct, we still need to visit 'dir'!
852 return m1_set
846 return m1_set
853
847
854 def isexact(self):
848 def isexact(self):
855 return self._m1.isexact()
849 return self._m1.isexact()
856
850
857 @encoding.strmethod
851 @encoding.strmethod
858 def __repr__(self):
852 def __repr__(self):
859 return b'<differencematcher m1=%r, m2=%r>' % (self._m1, self._m2)
853 return b'<differencematcher m1=%r, m2=%r>' % (self._m1, self._m2)
860
854
861
855
862 def intersectmatchers(m1, m2):
856 def intersectmatchers(m1, m2):
863 '''Composes two matchers by matching if both of them match.
857 '''Composes two matchers by matching if both of them match.
864
858
865 The second matcher's non-matching-attributes (bad, explicitdir,
859 The second matcher's non-matching-attributes (bad, traversedir) are ignored.
866 traversedir) are ignored.
867 '''
860 '''
868 if m1 is None or m2 is None:
861 if m1 is None or m2 is None:
869 return m1 or m2
862 return m1 or m2
870 if m1.always():
863 if m1.always():
871 m = copy.copy(m2)
864 m = copy.copy(m2)
872 # TODO: Consider encapsulating these things in a class so there's only
865 # TODO: Consider encapsulating these things in a class so there's only
873 # one thing to copy from m1.
866 # one thing to copy from m1.
874 m.bad = m1.bad
867 m.bad = m1.bad
875 m.explicitdir = m1.explicitdir
876 m.traversedir = m1.traversedir
868 m.traversedir = m1.traversedir
877 return m
869 return m
878 if m2.always():
870 if m2.always():
879 m = copy.copy(m1)
871 m = copy.copy(m1)
880 return m
872 return m
881 return intersectionmatcher(m1, m2)
873 return intersectionmatcher(m1, m2)
882
874
883
875
884 class intersectionmatcher(basematcher):
876 class intersectionmatcher(basematcher):
885 def __init__(self, m1, m2):
877 def __init__(self, m1, m2):
886 super(intersectionmatcher, self).__init__()
878 super(intersectionmatcher, self).__init__()
887 self._m1 = m1
879 self._m1 = m1
888 self._m2 = m2
880 self._m2 = m2
889 self.bad = m1.bad
881 self.bad = m1.bad
890 self.explicitdir = m1.explicitdir
891 self.traversedir = m1.traversedir
882 self.traversedir = m1.traversedir
892
883
893 @propertycache
884 @propertycache
894 def _files(self):
885 def _files(self):
895 if self.isexact():
886 if self.isexact():
896 m1, m2 = self._m1, self._m2
887 m1, m2 = self._m1, self._m2
897 if not m1.isexact():
888 if not m1.isexact():
898 m1, m2 = m2, m1
889 m1, m2 = m2, m1
899 return [f for f in m1.files() if m2(f)]
890 return [f for f in m1.files() if m2(f)]
900 # It neither m1 nor m2 is an exact matcher, we can't easily intersect
891 # It neither m1 nor m2 is an exact matcher, we can't easily intersect
901 # the set of files, because their files() are not always files. For
892 # the set of files, because their files() are not always files. For
902 # example, if intersecting a matcher "-I glob:foo.txt" with matcher of
893 # example, if intersecting a matcher "-I glob:foo.txt" with matcher of
903 # "path:dir2", we don't want to remove "dir2" from the set.
894 # "path:dir2", we don't want to remove "dir2" from the set.
904 return self._m1.files() + self._m2.files()
895 return self._m1.files() + self._m2.files()
905
896
906 def matchfn(self, f):
897 def matchfn(self, f):
907 return self._m1(f) and self._m2(f)
898 return self._m1(f) and self._m2(f)
908
899
909 def visitdir(self, dir):
900 def visitdir(self, dir):
910 visit1 = self._m1.visitdir(dir)
901 visit1 = self._m1.visitdir(dir)
911 if visit1 == b'all':
902 if visit1 == b'all':
912 return self._m2.visitdir(dir)
903 return self._m2.visitdir(dir)
913 # bool() because visit1=True + visit2='all' should not be 'all'
904 # bool() because visit1=True + visit2='all' should not be 'all'
914 return bool(visit1 and self._m2.visitdir(dir))
905 return bool(visit1 and self._m2.visitdir(dir))
915
906
916 def visitchildrenset(self, dir):
907 def visitchildrenset(self, dir):
917 m1_set = self._m1.visitchildrenset(dir)
908 m1_set = self._m1.visitchildrenset(dir)
918 if not m1_set:
909 if not m1_set:
919 return set()
910 return set()
920 m2_set = self._m2.visitchildrenset(dir)
911 m2_set = self._m2.visitchildrenset(dir)
921 if not m2_set:
912 if not m2_set:
922 return set()
913 return set()
923
914
924 if m1_set == b'all':
915 if m1_set == b'all':
925 return m2_set
916 return m2_set
926 elif m2_set == b'all':
917 elif m2_set == b'all':
927 return m1_set
918 return m1_set
928
919
929 if m1_set == b'this' or m2_set == b'this':
920 if m1_set == b'this' or m2_set == b'this':
930 return b'this'
921 return b'this'
931
922
932 assert isinstance(m1_set, set) and isinstance(m2_set, set)
923 assert isinstance(m1_set, set) and isinstance(m2_set, set)
933 return m1_set.intersection(m2_set)
924 return m1_set.intersection(m2_set)
934
925
935 def always(self):
926 def always(self):
936 return self._m1.always() and self._m2.always()
927 return self._m1.always() and self._m2.always()
937
928
938 def isexact(self):
929 def isexact(self):
939 return self._m1.isexact() or self._m2.isexact()
930 return self._m1.isexact() or self._m2.isexact()
940
931
941 @encoding.strmethod
932 @encoding.strmethod
942 def __repr__(self):
933 def __repr__(self):
943 return b'<intersectionmatcher m1=%r, m2=%r>' % (self._m1, self._m2)
934 return b'<intersectionmatcher m1=%r, m2=%r>' % (self._m1, self._m2)
944
935
945
936
946 class subdirmatcher(basematcher):
937 class subdirmatcher(basematcher):
947 """Adapt a matcher to work on a subdirectory only.
938 """Adapt a matcher to work on a subdirectory only.
948
939
949 The paths are remapped to remove/insert the path as needed:
940 The paths are remapped to remove/insert the path as needed:
950
941
951 >>> from . import pycompat
942 >>> from . import pycompat
952 >>> m1 = match(b'root', b'', [b'a.txt', b'sub/b.txt'])
943 >>> m1 = match(b'root', b'', [b'a.txt', b'sub/b.txt'])
953 >>> m2 = subdirmatcher(b'sub', m1)
944 >>> m2 = subdirmatcher(b'sub', m1)
954 >>> m2(b'a.txt')
945 >>> m2(b'a.txt')
955 False
946 False
956 >>> m2(b'b.txt')
947 >>> m2(b'b.txt')
957 True
948 True
958 >>> m2.matchfn(b'a.txt')
949 >>> m2.matchfn(b'a.txt')
959 False
950 False
960 >>> m2.matchfn(b'b.txt')
951 >>> m2.matchfn(b'b.txt')
961 True
952 True
962 >>> m2.files()
953 >>> m2.files()
963 ['b.txt']
954 ['b.txt']
964 >>> m2.exact(b'b.txt')
955 >>> m2.exact(b'b.txt')
965 True
956 True
966 >>> def bad(f, msg):
957 >>> def bad(f, msg):
967 ... print(pycompat.sysstr(b"%s: %s" % (f, msg)))
958 ... print(pycompat.sysstr(b"%s: %s" % (f, msg)))
968 >>> m1.bad = bad
959 >>> m1.bad = bad
969 >>> m2.bad(b'x.txt', b'No such file')
960 >>> m2.bad(b'x.txt', b'No such file')
970 sub/x.txt: No such file
961 sub/x.txt: No such file
971 """
962 """
972
963
973 def __init__(self, path, matcher):
964 def __init__(self, path, matcher):
974 super(subdirmatcher, self).__init__()
965 super(subdirmatcher, self).__init__()
975 self._path = path
966 self._path = path
976 self._matcher = matcher
967 self._matcher = matcher
977 self._always = matcher.always()
968 self._always = matcher.always()
978
969
979 self._files = [
970 self._files = [
980 f[len(path) + 1 :]
971 f[len(path) + 1 :]
981 for f in matcher._files
972 for f in matcher._files
982 if f.startswith(path + b"/")
973 if f.startswith(path + b"/")
983 ]
974 ]
984
975
985 # If the parent repo had a path to this subrepo and the matcher is
976 # If the parent repo had a path to this subrepo and the matcher is
986 # a prefix matcher, this submatcher always matches.
977 # a prefix matcher, this submatcher always matches.
987 if matcher.prefix():
978 if matcher.prefix():
988 self._always = any(f == path for f in matcher._files)
979 self._always = any(f == path for f in matcher._files)
989
980
990 def bad(self, f, msg):
981 def bad(self, f, msg):
991 self._matcher.bad(self._path + b"/" + f, msg)
982 self._matcher.bad(self._path + b"/" + f, msg)
992
983
993 def matchfn(self, f):
984 def matchfn(self, f):
994 # Some information is lost in the superclass's constructor, so we
985 # Some information is lost in the superclass's constructor, so we
995 # can not accurately create the matching function for the subdirectory
986 # can not accurately create the matching function for the subdirectory
996 # from the inputs. Instead, we override matchfn() and visitdir() to
987 # from the inputs. Instead, we override matchfn() and visitdir() to
997 # call the original matcher with the subdirectory path prepended.
988 # call the original matcher with the subdirectory path prepended.
998 return self._matcher.matchfn(self._path + b"/" + f)
989 return self._matcher.matchfn(self._path + b"/" + f)
999
990
1000 def visitdir(self, dir):
991 def visitdir(self, dir):
1001 if dir == b'':
992 if dir == b'':
1002 dir = self._path
993 dir = self._path
1003 else:
994 else:
1004 dir = self._path + b"/" + dir
995 dir = self._path + b"/" + dir
1005 return self._matcher.visitdir(dir)
996 return self._matcher.visitdir(dir)
1006
997
1007 def visitchildrenset(self, dir):
998 def visitchildrenset(self, dir):
1008 if dir == b'':
999 if dir == b'':
1009 dir = self._path
1000 dir = self._path
1010 else:
1001 else:
1011 dir = self._path + b"/" + dir
1002 dir = self._path + b"/" + dir
1012 return self._matcher.visitchildrenset(dir)
1003 return self._matcher.visitchildrenset(dir)
1013
1004
1014 def always(self):
1005 def always(self):
1015 return self._always
1006 return self._always
1016
1007
1017 def prefix(self):
1008 def prefix(self):
1018 return self._matcher.prefix() and not self._always
1009 return self._matcher.prefix() and not self._always
1019
1010
1020 @encoding.strmethod
1011 @encoding.strmethod
1021 def __repr__(self):
1012 def __repr__(self):
1022 return b'<subdirmatcher path=%r, matcher=%r>' % (
1013 return b'<subdirmatcher path=%r, matcher=%r>' % (
1023 self._path,
1014 self._path,
1024 self._matcher,
1015 self._matcher,
1025 )
1016 )
1026
1017
1027
1018
1028 class prefixdirmatcher(basematcher):
1019 class prefixdirmatcher(basematcher):
1029 """Adapt a matcher to work on a parent directory.
1020 """Adapt a matcher to work on a parent directory.
1030
1021
1031 The matcher's non-matching-attributes (bad, explicitdir, traversedir) are
1022 The matcher's non-matching-attributes (bad, traversedir) are ignored.
1032 ignored.
1033
1023
1034 The prefix path should usually be the relative path from the root of
1024 The prefix path should usually be the relative path from the root of
1035 this matcher to the root of the wrapped matcher.
1025 this matcher to the root of the wrapped matcher.
1036
1026
1037 >>> m1 = match(util.localpath(b'root/d/e'), b'f', [b'../a.txt', b'b.txt'])
1027 >>> m1 = match(util.localpath(b'root/d/e'), b'f', [b'../a.txt', b'b.txt'])
1038 >>> m2 = prefixdirmatcher(b'd/e', m1)
1028 >>> m2 = prefixdirmatcher(b'd/e', m1)
1039 >>> m2(b'a.txt')
1029 >>> m2(b'a.txt')
1040 False
1030 False
1041 >>> m2(b'd/e/a.txt')
1031 >>> m2(b'd/e/a.txt')
1042 True
1032 True
1043 >>> m2(b'd/e/b.txt')
1033 >>> m2(b'd/e/b.txt')
1044 False
1034 False
1045 >>> m2.files()
1035 >>> m2.files()
1046 ['d/e/a.txt', 'd/e/f/b.txt']
1036 ['d/e/a.txt', 'd/e/f/b.txt']
1047 >>> m2.exact(b'd/e/a.txt')
1037 >>> m2.exact(b'd/e/a.txt')
1048 True
1038 True
1049 >>> m2.visitdir(b'd')
1039 >>> m2.visitdir(b'd')
1050 True
1040 True
1051 >>> m2.visitdir(b'd/e')
1041 >>> m2.visitdir(b'd/e')
1052 True
1042 True
1053 >>> m2.visitdir(b'd/e/f')
1043 >>> m2.visitdir(b'd/e/f')
1054 True
1044 True
1055 >>> m2.visitdir(b'd/e/g')
1045 >>> m2.visitdir(b'd/e/g')
1056 False
1046 False
1057 >>> m2.visitdir(b'd/ef')
1047 >>> m2.visitdir(b'd/ef')
1058 False
1048 False
1059 """
1049 """
1060
1050
1061 def __init__(self, path, matcher, badfn=None):
1051 def __init__(self, path, matcher, badfn=None):
1062 super(prefixdirmatcher, self).__init__(badfn)
1052 super(prefixdirmatcher, self).__init__(badfn)
1063 if not path:
1053 if not path:
1064 raise error.ProgrammingError(b'prefix path must not be empty')
1054 raise error.ProgrammingError(b'prefix path must not be empty')
1065 self._path = path
1055 self._path = path
1066 self._pathprefix = path + b'/'
1056 self._pathprefix = path + b'/'
1067 self._matcher = matcher
1057 self._matcher = matcher
1068
1058
1069 @propertycache
1059 @propertycache
1070 def _files(self):
1060 def _files(self):
1071 return [self._pathprefix + f for f in self._matcher._files]
1061 return [self._pathprefix + f for f in self._matcher._files]
1072
1062
1073 def matchfn(self, f):
1063 def matchfn(self, f):
1074 if not f.startswith(self._pathprefix):
1064 if not f.startswith(self._pathprefix):
1075 return False
1065 return False
1076 return self._matcher.matchfn(f[len(self._pathprefix) :])
1066 return self._matcher.matchfn(f[len(self._pathprefix) :])
1077
1067
1078 @propertycache
1068 @propertycache
1079 def _pathdirs(self):
1069 def _pathdirs(self):
1080 return set(pathutil.finddirs(self._path))
1070 return set(pathutil.finddirs(self._path))
1081
1071
1082 def visitdir(self, dir):
1072 def visitdir(self, dir):
1083 if dir == self._path:
1073 if dir == self._path:
1084 return self._matcher.visitdir(b'')
1074 return self._matcher.visitdir(b'')
1085 if dir.startswith(self._pathprefix):
1075 if dir.startswith(self._pathprefix):
1086 return self._matcher.visitdir(dir[len(self._pathprefix) :])
1076 return self._matcher.visitdir(dir[len(self._pathprefix) :])
1087 return dir in self._pathdirs
1077 return dir in self._pathdirs
1088
1078
1089 def visitchildrenset(self, dir):
1079 def visitchildrenset(self, dir):
1090 if dir == self._path:
1080 if dir == self._path:
1091 return self._matcher.visitchildrenset(b'')
1081 return self._matcher.visitchildrenset(b'')
1092 if dir.startswith(self._pathprefix):
1082 if dir.startswith(self._pathprefix):
1093 return self._matcher.visitchildrenset(dir[len(self._pathprefix) :])
1083 return self._matcher.visitchildrenset(dir[len(self._pathprefix) :])
1094 if dir in self._pathdirs:
1084 if dir in self._pathdirs:
1095 return b'this'
1085 return b'this'
1096 return set()
1086 return set()
1097
1087
1098 def isexact(self):
1088 def isexact(self):
1099 return self._matcher.isexact()
1089 return self._matcher.isexact()
1100
1090
1101 def prefix(self):
1091 def prefix(self):
1102 return self._matcher.prefix()
1092 return self._matcher.prefix()
1103
1093
1104 @encoding.strmethod
1094 @encoding.strmethod
1105 def __repr__(self):
1095 def __repr__(self):
1106 return b'<prefixdirmatcher path=%r, matcher=%r>' % (
1096 return b'<prefixdirmatcher path=%r, matcher=%r>' % (
1107 pycompat.bytestr(self._path),
1097 pycompat.bytestr(self._path),
1108 self._matcher,
1098 self._matcher,
1109 )
1099 )
1110
1100
1111
1101
1112 class unionmatcher(basematcher):
1102 class unionmatcher(basematcher):
1113 """A matcher that is the union of several matchers.
1103 """A matcher that is the union of several matchers.
1114
1104
1115 The non-matching-attributes (bad, explicitdir, traversedir) are taken from
1105 The non-matching-attributes (bad, traversedir) are taken from the first
1116 the first matcher.
1106 matcher.
1117 """
1107 """
1118
1108
1119 def __init__(self, matchers):
1109 def __init__(self, matchers):
1120 m1 = matchers[0]
1110 m1 = matchers[0]
1121 super(unionmatcher, self).__init__()
1111 super(unionmatcher, self).__init__()
1122 self.explicitdir = m1.explicitdir
1123 self.traversedir = m1.traversedir
1112 self.traversedir = m1.traversedir
1124 self._matchers = matchers
1113 self._matchers = matchers
1125
1114
1126 def matchfn(self, f):
1115 def matchfn(self, f):
1127 for match in self._matchers:
1116 for match in self._matchers:
1128 if match(f):
1117 if match(f):
1129 return True
1118 return True
1130 return False
1119 return False
1131
1120
1132 def visitdir(self, dir):
1121 def visitdir(self, dir):
1133 r = False
1122 r = False
1134 for m in self._matchers:
1123 for m in self._matchers:
1135 v = m.visitdir(dir)
1124 v = m.visitdir(dir)
1136 if v == b'all':
1125 if v == b'all':
1137 return v
1126 return v
1138 r |= v
1127 r |= v
1139 return r
1128 return r
1140
1129
1141 def visitchildrenset(self, dir):
1130 def visitchildrenset(self, dir):
1142 r = set()
1131 r = set()
1143 this = False
1132 this = False
1144 for m in self._matchers:
1133 for m in self._matchers:
1145 v = m.visitchildrenset(dir)
1134 v = m.visitchildrenset(dir)
1146 if not v:
1135 if not v:
1147 continue
1136 continue
1148 if v == b'all':
1137 if v == b'all':
1149 return v
1138 return v
1150 if this or v == b'this':
1139 if this or v == b'this':
1151 this = True
1140 this = True
1152 # don't break, we might have an 'all' in here.
1141 # don't break, we might have an 'all' in here.
1153 continue
1142 continue
1154 assert isinstance(v, set)
1143 assert isinstance(v, set)
1155 r = r.union(v)
1144 r = r.union(v)
1156 if this:
1145 if this:
1157 return b'this'
1146 return b'this'
1158 return r
1147 return r
1159
1148
1160 @encoding.strmethod
1149 @encoding.strmethod
1161 def __repr__(self):
1150 def __repr__(self):
1162 return b'<unionmatcher matchers=%r>' % self._matchers
1151 return b'<unionmatcher matchers=%r>' % self._matchers
1163
1152
1164
1153
1165 def patkind(pattern, default=None):
1154 def patkind(pattern, default=None):
1166 '''If pattern is 'kind:pat' with a known kind, return kind.
1155 '''If pattern is 'kind:pat' with a known kind, return kind.
1167
1156
1168 >>> patkind(br're:.*\.c$')
1157 >>> patkind(br're:.*\.c$')
1169 're'
1158 're'
1170 >>> patkind(b'glob:*.c')
1159 >>> patkind(b'glob:*.c')
1171 'glob'
1160 'glob'
1172 >>> patkind(b'relpath:test.py')
1161 >>> patkind(b'relpath:test.py')
1173 'relpath'
1162 'relpath'
1174 >>> patkind(b'main.py')
1163 >>> patkind(b'main.py')
1175 >>> patkind(b'main.py', default=b're')
1164 >>> patkind(b'main.py', default=b're')
1176 're'
1165 're'
1177 '''
1166 '''
1178 return _patsplit(pattern, default)[0]
1167 return _patsplit(pattern, default)[0]
1179
1168
1180
1169
1181 def _patsplit(pattern, default):
1170 def _patsplit(pattern, default):
1182 """Split a string into the optional pattern kind prefix and the actual
1171 """Split a string into the optional pattern kind prefix and the actual
1183 pattern."""
1172 pattern."""
1184 if b':' in pattern:
1173 if b':' in pattern:
1185 kind, pat = pattern.split(b':', 1)
1174 kind, pat = pattern.split(b':', 1)
1186 if kind in allpatternkinds:
1175 if kind in allpatternkinds:
1187 return kind, pat
1176 return kind, pat
1188 return default, pattern
1177 return default, pattern
1189
1178
1190
1179
1191 def _globre(pat):
1180 def _globre(pat):
1192 r'''Convert an extended glob string to a regexp string.
1181 r'''Convert an extended glob string to a regexp string.
1193
1182
1194 >>> from . import pycompat
1183 >>> from . import pycompat
1195 >>> def bprint(s):
1184 >>> def bprint(s):
1196 ... print(pycompat.sysstr(s))
1185 ... print(pycompat.sysstr(s))
1197 >>> bprint(_globre(br'?'))
1186 >>> bprint(_globre(br'?'))
1198 .
1187 .
1199 >>> bprint(_globre(br'*'))
1188 >>> bprint(_globre(br'*'))
1200 [^/]*
1189 [^/]*
1201 >>> bprint(_globre(br'**'))
1190 >>> bprint(_globre(br'**'))
1202 .*
1191 .*
1203 >>> bprint(_globre(br'**/a'))
1192 >>> bprint(_globre(br'**/a'))
1204 (?:.*/)?a
1193 (?:.*/)?a
1205 >>> bprint(_globre(br'a/**/b'))
1194 >>> bprint(_globre(br'a/**/b'))
1206 a/(?:.*/)?b
1195 a/(?:.*/)?b
1207 >>> bprint(_globre(br'[a*?!^][^b][!c]'))
1196 >>> bprint(_globre(br'[a*?!^][^b][!c]'))
1208 [a*?!^][\^b][^c]
1197 [a*?!^][\^b][^c]
1209 >>> bprint(_globre(br'{a,b}'))
1198 >>> bprint(_globre(br'{a,b}'))
1210 (?:a|b)
1199 (?:a|b)
1211 >>> bprint(_globre(br'.\*\?'))
1200 >>> bprint(_globre(br'.\*\?'))
1212 \.\*\?
1201 \.\*\?
1213 '''
1202 '''
1214 i, n = 0, len(pat)
1203 i, n = 0, len(pat)
1215 res = b''
1204 res = b''
1216 group = 0
1205 group = 0
1217 escape = util.stringutil.regexbytesescapemap.get
1206 escape = util.stringutil.regexbytesescapemap.get
1218
1207
1219 def peek():
1208 def peek():
1220 return i < n and pat[i : i + 1]
1209 return i < n and pat[i : i + 1]
1221
1210
1222 while i < n:
1211 while i < n:
1223 c = pat[i : i + 1]
1212 c = pat[i : i + 1]
1224 i += 1
1213 i += 1
1225 if c not in b'*?[{},\\':
1214 if c not in b'*?[{},\\':
1226 res += escape(c, c)
1215 res += escape(c, c)
1227 elif c == b'*':
1216 elif c == b'*':
1228 if peek() == b'*':
1217 if peek() == b'*':
1229 i += 1
1218 i += 1
1230 if peek() == b'/':
1219 if peek() == b'/':
1231 i += 1
1220 i += 1
1232 res += b'(?:.*/)?'
1221 res += b'(?:.*/)?'
1233 else:
1222 else:
1234 res += b'.*'
1223 res += b'.*'
1235 else:
1224 else:
1236 res += b'[^/]*'
1225 res += b'[^/]*'
1237 elif c == b'?':
1226 elif c == b'?':
1238 res += b'.'
1227 res += b'.'
1239 elif c == b'[':
1228 elif c == b'[':
1240 j = i
1229 j = i
1241 if j < n and pat[j : j + 1] in b'!]':
1230 if j < n and pat[j : j + 1] in b'!]':
1242 j += 1
1231 j += 1
1243 while j < n and pat[j : j + 1] != b']':
1232 while j < n and pat[j : j + 1] != b']':
1244 j += 1
1233 j += 1
1245 if j >= n:
1234 if j >= n:
1246 res += b'\\['
1235 res += b'\\['
1247 else:
1236 else:
1248 stuff = pat[i:j].replace(b'\\', b'\\\\')
1237 stuff = pat[i:j].replace(b'\\', b'\\\\')
1249 i = j + 1
1238 i = j + 1
1250 if stuff[0:1] == b'!':
1239 if stuff[0:1] == b'!':
1251 stuff = b'^' + stuff[1:]
1240 stuff = b'^' + stuff[1:]
1252 elif stuff[0:1] == b'^':
1241 elif stuff[0:1] == b'^':
1253 stuff = b'\\' + stuff
1242 stuff = b'\\' + stuff
1254 res = b'%s[%s]' % (res, stuff)
1243 res = b'%s[%s]' % (res, stuff)
1255 elif c == b'{':
1244 elif c == b'{':
1256 group += 1
1245 group += 1
1257 res += b'(?:'
1246 res += b'(?:'
1258 elif c == b'}' and group:
1247 elif c == b'}' and group:
1259 res += b')'
1248 res += b')'
1260 group -= 1
1249 group -= 1
1261 elif c == b',' and group:
1250 elif c == b',' and group:
1262 res += b'|'
1251 res += b'|'
1263 elif c == b'\\':
1252 elif c == b'\\':
1264 p = peek()
1253 p = peek()
1265 if p:
1254 if p:
1266 i += 1
1255 i += 1
1267 res += escape(p, p)
1256 res += escape(p, p)
1268 else:
1257 else:
1269 res += escape(c, c)
1258 res += escape(c, c)
1270 else:
1259 else:
1271 res += escape(c, c)
1260 res += escape(c, c)
1272 return res
1261 return res
1273
1262
1274
1263
1275 def _regex(kind, pat, globsuffix):
1264 def _regex(kind, pat, globsuffix):
1276 '''Convert a (normalized) pattern of any kind into a
1265 '''Convert a (normalized) pattern of any kind into a
1277 regular expression.
1266 regular expression.
1278 globsuffix is appended to the regexp of globs.'''
1267 globsuffix is appended to the regexp of globs.'''
1279
1268
1280 if rustmod is not None:
1269 if rustmod is not None:
1281 try:
1270 try:
1282 return rustmod.build_single_regex(kind, pat, globsuffix)
1271 return rustmod.build_single_regex(kind, pat, globsuffix)
1283 except rustmod.PatternError:
1272 except rustmod.PatternError:
1284 raise error.ProgrammingError(
1273 raise error.ProgrammingError(
1285 b'not a regex pattern: %s:%s' % (kind, pat)
1274 b'not a regex pattern: %s:%s' % (kind, pat)
1286 )
1275 )
1287
1276
1288 if not pat and kind in (b'glob', b'relpath'):
1277 if not pat and kind in (b'glob', b'relpath'):
1289 return b''
1278 return b''
1290 if kind == b're':
1279 if kind == b're':
1291 return pat
1280 return pat
1292 if kind in (b'path', b'relpath'):
1281 if kind in (b'path', b'relpath'):
1293 if pat == b'.':
1282 if pat == b'.':
1294 return b''
1283 return b''
1295 return util.stringutil.reescape(pat) + b'(?:/|$)'
1284 return util.stringutil.reescape(pat) + b'(?:/|$)'
1296 if kind == b'rootfilesin':
1285 if kind == b'rootfilesin':
1297 if pat == b'.':
1286 if pat == b'.':
1298 escaped = b''
1287 escaped = b''
1299 else:
1288 else:
1300 # Pattern is a directory name.
1289 # Pattern is a directory name.
1301 escaped = util.stringutil.reescape(pat) + b'/'
1290 escaped = util.stringutil.reescape(pat) + b'/'
1302 # Anything after the pattern must be a non-directory.
1291 # Anything after the pattern must be a non-directory.
1303 return escaped + b'[^/]+$'
1292 return escaped + b'[^/]+$'
1304 if kind == b'relglob':
1293 if kind == b'relglob':
1305 globre = _globre(pat)
1294 globre = _globre(pat)
1306 if globre.startswith(b'[^/]*'):
1295 if globre.startswith(b'[^/]*'):
1307 # When pat has the form *XYZ (common), make the returned regex more
1296 # When pat has the form *XYZ (common), make the returned regex more
1308 # legible by returning the regex for **XYZ instead of **/*XYZ.
1297 # legible by returning the regex for **XYZ instead of **/*XYZ.
1309 return b'.*' + globre[len(b'[^/]*') :] + globsuffix
1298 return b'.*' + globre[len(b'[^/]*') :] + globsuffix
1310 return b'(?:|.*/)' + globre + globsuffix
1299 return b'(?:|.*/)' + globre + globsuffix
1311 if kind == b'relre':
1300 if kind == b'relre':
1312 if pat.startswith(b'^'):
1301 if pat.startswith(b'^'):
1313 return pat
1302 return pat
1314 return b'.*' + pat
1303 return b'.*' + pat
1315 if kind in (b'glob', b'rootglob'):
1304 if kind in (b'glob', b'rootglob'):
1316 return _globre(pat) + globsuffix
1305 return _globre(pat) + globsuffix
1317 raise error.ProgrammingError(b'not a regex pattern: %s:%s' % (kind, pat))
1306 raise error.ProgrammingError(b'not a regex pattern: %s:%s' % (kind, pat))
1318
1307
1319
1308
1320 def _buildmatch(kindpats, globsuffix, root):
1309 def _buildmatch(kindpats, globsuffix, root):
1321 '''Return regexp string and a matcher function for kindpats.
1310 '''Return regexp string and a matcher function for kindpats.
1322 globsuffix is appended to the regexp of globs.'''
1311 globsuffix is appended to the regexp of globs.'''
1323 matchfuncs = []
1312 matchfuncs = []
1324
1313
1325 subincludes, kindpats = _expandsubinclude(kindpats, root)
1314 subincludes, kindpats = _expandsubinclude(kindpats, root)
1326 if subincludes:
1315 if subincludes:
1327 submatchers = {}
1316 submatchers = {}
1328
1317
1329 def matchsubinclude(f):
1318 def matchsubinclude(f):
1330 for prefix, matcherargs in subincludes:
1319 for prefix, matcherargs in subincludes:
1331 if f.startswith(prefix):
1320 if f.startswith(prefix):
1332 mf = submatchers.get(prefix)
1321 mf = submatchers.get(prefix)
1333 if mf is None:
1322 if mf is None:
1334 mf = match(*matcherargs)
1323 mf = match(*matcherargs)
1335 submatchers[prefix] = mf
1324 submatchers[prefix] = mf
1336
1325
1337 if mf(f[len(prefix) :]):
1326 if mf(f[len(prefix) :]):
1338 return True
1327 return True
1339 return False
1328 return False
1340
1329
1341 matchfuncs.append(matchsubinclude)
1330 matchfuncs.append(matchsubinclude)
1342
1331
1343 regex = b''
1332 regex = b''
1344 if kindpats:
1333 if kindpats:
1345 if all(k == b'rootfilesin' for k, p, s in kindpats):
1334 if all(k == b'rootfilesin' for k, p, s in kindpats):
1346 dirs = {p for k, p, s in kindpats}
1335 dirs = {p for k, p, s in kindpats}
1347
1336
1348 def mf(f):
1337 def mf(f):
1349 i = f.rfind(b'/')
1338 i = f.rfind(b'/')
1350 if i >= 0:
1339 if i >= 0:
1351 dir = f[:i]
1340 dir = f[:i]
1352 else:
1341 else:
1353 dir = b'.'
1342 dir = b'.'
1354 return dir in dirs
1343 return dir in dirs
1355
1344
1356 regex = b'rootfilesin: %s' % stringutil.pprint(list(sorted(dirs)))
1345 regex = b'rootfilesin: %s' % stringutil.pprint(list(sorted(dirs)))
1357 matchfuncs.append(mf)
1346 matchfuncs.append(mf)
1358 else:
1347 else:
1359 regex, mf = _buildregexmatch(kindpats, globsuffix)
1348 regex, mf = _buildregexmatch(kindpats, globsuffix)
1360 matchfuncs.append(mf)
1349 matchfuncs.append(mf)
1361
1350
1362 if len(matchfuncs) == 1:
1351 if len(matchfuncs) == 1:
1363 return regex, matchfuncs[0]
1352 return regex, matchfuncs[0]
1364 else:
1353 else:
1365 return regex, lambda f: any(mf(f) for mf in matchfuncs)
1354 return regex, lambda f: any(mf(f) for mf in matchfuncs)
1366
1355
1367
1356
1368 MAX_RE_SIZE = 20000
1357 MAX_RE_SIZE = 20000
1369
1358
1370
1359
1371 def _joinregexes(regexps):
1360 def _joinregexes(regexps):
1372 """gather multiple regular expressions into a single one"""
1361 """gather multiple regular expressions into a single one"""
1373 return b'|'.join(regexps)
1362 return b'|'.join(regexps)
1374
1363
1375
1364
1376 def _buildregexmatch(kindpats, globsuffix):
1365 def _buildregexmatch(kindpats, globsuffix):
1377 """Build a match function from a list of kinds and kindpats,
1366 """Build a match function from a list of kinds and kindpats,
1378 return regexp string and a matcher function.
1367 return regexp string and a matcher function.
1379
1368
1380 Test too large input
1369 Test too large input
1381 >>> _buildregexmatch([
1370 >>> _buildregexmatch([
1382 ... (b'relglob', b'?' * MAX_RE_SIZE, b'')
1371 ... (b'relglob', b'?' * MAX_RE_SIZE, b'')
1383 ... ], b'$')
1372 ... ], b'$')
1384 Traceback (most recent call last):
1373 Traceback (most recent call last):
1385 ...
1374 ...
1386 Abort: matcher pattern is too long (20009 bytes)
1375 Abort: matcher pattern is too long (20009 bytes)
1387 """
1376 """
1388 try:
1377 try:
1389 allgroups = []
1378 allgroups = []
1390 regexps = [_regex(k, p, globsuffix) for (k, p, s) in kindpats]
1379 regexps = [_regex(k, p, globsuffix) for (k, p, s) in kindpats]
1391 fullregexp = _joinregexes(regexps)
1380 fullregexp = _joinregexes(regexps)
1392
1381
1393 startidx = 0
1382 startidx = 0
1394 groupsize = 0
1383 groupsize = 0
1395 for idx, r in enumerate(regexps):
1384 for idx, r in enumerate(regexps):
1396 piecesize = len(r)
1385 piecesize = len(r)
1397 if piecesize > MAX_RE_SIZE:
1386 if piecesize > MAX_RE_SIZE:
1398 msg = _(b"matcher pattern is too long (%d bytes)") % piecesize
1387 msg = _(b"matcher pattern is too long (%d bytes)") % piecesize
1399 raise error.Abort(msg)
1388 raise error.Abort(msg)
1400 elif (groupsize + piecesize) > MAX_RE_SIZE:
1389 elif (groupsize + piecesize) > MAX_RE_SIZE:
1401 group = regexps[startidx:idx]
1390 group = regexps[startidx:idx]
1402 allgroups.append(_joinregexes(group))
1391 allgroups.append(_joinregexes(group))
1403 startidx = idx
1392 startidx = idx
1404 groupsize = 0
1393 groupsize = 0
1405 groupsize += piecesize + 1
1394 groupsize += piecesize + 1
1406
1395
1407 if startidx == 0:
1396 if startidx == 0:
1408 matcher = _rematcher(fullregexp)
1397 matcher = _rematcher(fullregexp)
1409 func = lambda s: bool(matcher(s))
1398 func = lambda s: bool(matcher(s))
1410 else:
1399 else:
1411 group = regexps[startidx:]
1400 group = regexps[startidx:]
1412 allgroups.append(_joinregexes(group))
1401 allgroups.append(_joinregexes(group))
1413 allmatchers = [_rematcher(g) for g in allgroups]
1402 allmatchers = [_rematcher(g) for g in allgroups]
1414 func = lambda s: any(m(s) for m in allmatchers)
1403 func = lambda s: any(m(s) for m in allmatchers)
1415 return fullregexp, func
1404 return fullregexp, func
1416 except re.error:
1405 except re.error:
1417 for k, p, s in kindpats:
1406 for k, p, s in kindpats:
1418 try:
1407 try:
1419 _rematcher(_regex(k, p, globsuffix))
1408 _rematcher(_regex(k, p, globsuffix))
1420 except re.error:
1409 except re.error:
1421 if s:
1410 if s:
1422 raise error.Abort(
1411 raise error.Abort(
1423 _(b"%s: invalid pattern (%s): %s") % (s, k, p)
1412 _(b"%s: invalid pattern (%s): %s") % (s, k, p)
1424 )
1413 )
1425 else:
1414 else:
1426 raise error.Abort(_(b"invalid pattern (%s): %s") % (k, p))
1415 raise error.Abort(_(b"invalid pattern (%s): %s") % (k, p))
1427 raise error.Abort(_(b"invalid pattern"))
1416 raise error.Abort(_(b"invalid pattern"))
1428
1417
1429
1418
1430 def _patternrootsanddirs(kindpats):
1419 def _patternrootsanddirs(kindpats):
1431 '''Returns roots and directories corresponding to each pattern.
1420 '''Returns roots and directories corresponding to each pattern.
1432
1421
1433 This calculates the roots and directories exactly matching the patterns and
1422 This calculates the roots and directories exactly matching the patterns and
1434 returns a tuple of (roots, dirs) for each. It does not return other
1423 returns a tuple of (roots, dirs) for each. It does not return other
1435 directories which may also need to be considered, like the parent
1424 directories which may also need to be considered, like the parent
1436 directories.
1425 directories.
1437 '''
1426 '''
1438 r = []
1427 r = []
1439 d = []
1428 d = []
1440 for kind, pat, source in kindpats:
1429 for kind, pat, source in kindpats:
1441 if kind in (b'glob', b'rootglob'): # find the non-glob prefix
1430 if kind in (b'glob', b'rootglob'): # find the non-glob prefix
1442 root = []
1431 root = []
1443 for p in pat.split(b'/'):
1432 for p in pat.split(b'/'):
1444 if b'[' in p or b'{' in p or b'*' in p or b'?' in p:
1433 if b'[' in p or b'{' in p or b'*' in p or b'?' in p:
1445 break
1434 break
1446 root.append(p)
1435 root.append(p)
1447 r.append(b'/'.join(root))
1436 r.append(b'/'.join(root))
1448 elif kind in (b'relpath', b'path'):
1437 elif kind in (b'relpath', b'path'):
1449 if pat == b'.':
1438 if pat == b'.':
1450 pat = b''
1439 pat = b''
1451 r.append(pat)
1440 r.append(pat)
1452 elif kind in (b'rootfilesin',):
1441 elif kind in (b'rootfilesin',):
1453 if pat == b'.':
1442 if pat == b'.':
1454 pat = b''
1443 pat = b''
1455 d.append(pat)
1444 d.append(pat)
1456 else: # relglob, re, relre
1445 else: # relglob, re, relre
1457 r.append(b'')
1446 r.append(b'')
1458 return r, d
1447 return r, d
1459
1448
1460
1449
1461 def _roots(kindpats):
1450 def _roots(kindpats):
1462 '''Returns root directories to match recursively from the given patterns.'''
1451 '''Returns root directories to match recursively from the given patterns.'''
1463 roots, dirs = _patternrootsanddirs(kindpats)
1452 roots, dirs = _patternrootsanddirs(kindpats)
1464 return roots
1453 return roots
1465
1454
1466
1455
1467 def _rootsdirsandparents(kindpats):
1456 def _rootsdirsandparents(kindpats):
1468 '''Returns roots and exact directories from patterns.
1457 '''Returns roots and exact directories from patterns.
1469
1458
1470 `roots` are directories to match recursively, `dirs` should
1459 `roots` are directories to match recursively, `dirs` should
1471 be matched non-recursively, and `parents` are the implicitly required
1460 be matched non-recursively, and `parents` are the implicitly required
1472 directories to walk to items in either roots or dirs.
1461 directories to walk to items in either roots or dirs.
1473
1462
1474 Returns a tuple of (roots, dirs, parents).
1463 Returns a tuple of (roots, dirs, parents).
1475
1464
1476 >>> r = _rootsdirsandparents(
1465 >>> r = _rootsdirsandparents(
1477 ... [(b'glob', b'g/h/*', b''), (b'glob', b'g/h', b''),
1466 ... [(b'glob', b'g/h/*', b''), (b'glob', b'g/h', b''),
1478 ... (b'glob', b'g*', b'')])
1467 ... (b'glob', b'g*', b'')])
1479 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1468 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1480 (['g/h', 'g/h', ''], []) ['', 'g']
1469 (['g/h', 'g/h', ''], []) ['', 'g']
1481 >>> r = _rootsdirsandparents(
1470 >>> r = _rootsdirsandparents(
1482 ... [(b'rootfilesin', b'g/h', b''), (b'rootfilesin', b'', b'')])
1471 ... [(b'rootfilesin', b'g/h', b''), (b'rootfilesin', b'', b'')])
1483 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1472 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1484 ([], ['g/h', '']) ['', 'g']
1473 ([], ['g/h', '']) ['', 'g']
1485 >>> r = _rootsdirsandparents(
1474 >>> r = _rootsdirsandparents(
1486 ... [(b'relpath', b'r', b''), (b'path', b'p/p', b''),
1475 ... [(b'relpath', b'r', b''), (b'path', b'p/p', b''),
1487 ... (b'path', b'', b'')])
1476 ... (b'path', b'', b'')])
1488 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1477 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1489 (['r', 'p/p', ''], []) ['', 'p']
1478 (['r', 'p/p', ''], []) ['', 'p']
1490 >>> r = _rootsdirsandparents(
1479 >>> r = _rootsdirsandparents(
1491 ... [(b'relglob', b'rg*', b''), (b're', b're/', b''),
1480 ... [(b'relglob', b'rg*', b''), (b're', b're/', b''),
1492 ... (b'relre', b'rr', b'')])
1481 ... (b'relre', b'rr', b'')])
1493 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1482 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1494 (['', '', ''], []) ['']
1483 (['', '', ''], []) ['']
1495 '''
1484 '''
1496 r, d = _patternrootsanddirs(kindpats)
1485 r, d = _patternrootsanddirs(kindpats)
1497
1486
1498 p = set()
1487 p = set()
1499 # Add the parents as non-recursive/exact directories, since they must be
1488 # Add the parents as non-recursive/exact directories, since they must be
1500 # scanned to get to either the roots or the other exact directories.
1489 # scanned to get to either the roots or the other exact directories.
1501 p.update(pathutil.dirs(d))
1490 p.update(pathutil.dirs(d))
1502 p.update(pathutil.dirs(r))
1491 p.update(pathutil.dirs(r))
1503
1492
1504 # FIXME: all uses of this function convert these to sets, do so before
1493 # FIXME: all uses of this function convert these to sets, do so before
1505 # returning.
1494 # returning.
1506 # FIXME: all uses of this function do not need anything in 'roots' and
1495 # FIXME: all uses of this function do not need anything in 'roots' and
1507 # 'dirs' to also be in 'parents', consider removing them before returning.
1496 # 'dirs' to also be in 'parents', consider removing them before returning.
1508 return r, d, p
1497 return r, d, p
1509
1498
1510
1499
1511 def _explicitfiles(kindpats):
1500 def _explicitfiles(kindpats):
1512 '''Returns the potential explicit filenames from the patterns.
1501 '''Returns the potential explicit filenames from the patterns.
1513
1502
1514 >>> _explicitfiles([(b'path', b'foo/bar', b'')])
1503 >>> _explicitfiles([(b'path', b'foo/bar', b'')])
1515 ['foo/bar']
1504 ['foo/bar']
1516 >>> _explicitfiles([(b'rootfilesin', b'foo/bar', b'')])
1505 >>> _explicitfiles([(b'rootfilesin', b'foo/bar', b'')])
1517 []
1506 []
1518 '''
1507 '''
1519 # Keep only the pattern kinds where one can specify filenames (vs only
1508 # Keep only the pattern kinds where one can specify filenames (vs only
1520 # directory names).
1509 # directory names).
1521 filable = [kp for kp in kindpats if kp[0] not in (b'rootfilesin',)]
1510 filable = [kp for kp in kindpats if kp[0] not in (b'rootfilesin',)]
1522 return _roots(filable)
1511 return _roots(filable)
1523
1512
1524
1513
1525 def _prefix(kindpats):
1514 def _prefix(kindpats):
1526 '''Whether all the patterns match a prefix (i.e. recursively)'''
1515 '''Whether all the patterns match a prefix (i.e. recursively)'''
1527 for kind, pat, source in kindpats:
1516 for kind, pat, source in kindpats:
1528 if kind not in (b'path', b'relpath'):
1517 if kind not in (b'path', b'relpath'):
1529 return False
1518 return False
1530 return True
1519 return True
1531
1520
1532
1521
1533 _commentre = None
1522 _commentre = None
1534
1523
1535
1524
1536 def readpatternfile(filepath, warn, sourceinfo=False):
1525 def readpatternfile(filepath, warn, sourceinfo=False):
1537 '''parse a pattern file, returning a list of
1526 '''parse a pattern file, returning a list of
1538 patterns. These patterns should be given to compile()
1527 patterns. These patterns should be given to compile()
1539 to be validated and converted into a match function.
1528 to be validated and converted into a match function.
1540
1529
1541 trailing white space is dropped.
1530 trailing white space is dropped.
1542 the escape character is backslash.
1531 the escape character is backslash.
1543 comments start with #.
1532 comments start with #.
1544 empty lines are skipped.
1533 empty lines are skipped.
1545
1534
1546 lines can be of the following formats:
1535 lines can be of the following formats:
1547
1536
1548 syntax: regexp # defaults following lines to non-rooted regexps
1537 syntax: regexp # defaults following lines to non-rooted regexps
1549 syntax: glob # defaults following lines to non-rooted globs
1538 syntax: glob # defaults following lines to non-rooted globs
1550 re:pattern # non-rooted regular expression
1539 re:pattern # non-rooted regular expression
1551 glob:pattern # non-rooted glob
1540 glob:pattern # non-rooted glob
1552 rootglob:pat # rooted glob (same root as ^ in regexps)
1541 rootglob:pat # rooted glob (same root as ^ in regexps)
1553 pattern # pattern of the current default type
1542 pattern # pattern of the current default type
1554
1543
1555 if sourceinfo is set, returns a list of tuples:
1544 if sourceinfo is set, returns a list of tuples:
1556 (pattern, lineno, originalline).
1545 (pattern, lineno, originalline).
1557 This is useful to debug ignore patterns.
1546 This is useful to debug ignore patterns.
1558 '''
1547 '''
1559
1548
1560 if rustmod is not None:
1549 if rustmod is not None:
1561 result, warnings = rustmod.read_pattern_file(
1550 result, warnings = rustmod.read_pattern_file(
1562 filepath, bool(warn), sourceinfo,
1551 filepath, bool(warn), sourceinfo,
1563 )
1552 )
1564
1553
1565 for warning_params in warnings:
1554 for warning_params in warnings:
1566 # Can't be easily emitted from Rust, because it would require
1555 # Can't be easily emitted from Rust, because it would require
1567 # a mechanism for both gettext and calling the `warn` function.
1556 # a mechanism for both gettext and calling the `warn` function.
1568 warn(_(b"%s: ignoring invalid syntax '%s'\n") % warning_params)
1557 warn(_(b"%s: ignoring invalid syntax '%s'\n") % warning_params)
1569
1558
1570 return result
1559 return result
1571
1560
1572 syntaxes = {
1561 syntaxes = {
1573 b're': b'relre:',
1562 b're': b'relre:',
1574 b'regexp': b'relre:',
1563 b'regexp': b'relre:',
1575 b'glob': b'relglob:',
1564 b'glob': b'relglob:',
1576 b'rootglob': b'rootglob:',
1565 b'rootglob': b'rootglob:',
1577 b'include': b'include',
1566 b'include': b'include',
1578 b'subinclude': b'subinclude',
1567 b'subinclude': b'subinclude',
1579 }
1568 }
1580 syntax = b'relre:'
1569 syntax = b'relre:'
1581 patterns = []
1570 patterns = []
1582
1571
1583 fp = open(filepath, b'rb')
1572 fp = open(filepath, b'rb')
1584 for lineno, line in enumerate(util.iterfile(fp), start=1):
1573 for lineno, line in enumerate(util.iterfile(fp), start=1):
1585 if b"#" in line:
1574 if b"#" in line:
1586 global _commentre
1575 global _commentre
1587 if not _commentre:
1576 if not _commentre:
1588 _commentre = util.re.compile(br'((?:^|[^\\])(?:\\\\)*)#.*')
1577 _commentre = util.re.compile(br'((?:^|[^\\])(?:\\\\)*)#.*')
1589 # remove comments prefixed by an even number of escapes
1578 # remove comments prefixed by an even number of escapes
1590 m = _commentre.search(line)
1579 m = _commentre.search(line)
1591 if m:
1580 if m:
1592 line = line[: m.end(1)]
1581 line = line[: m.end(1)]
1593 # fixup properly escaped comments that survived the above
1582 # fixup properly escaped comments that survived the above
1594 line = line.replace(b"\\#", b"#")
1583 line = line.replace(b"\\#", b"#")
1595 line = line.rstrip()
1584 line = line.rstrip()
1596 if not line:
1585 if not line:
1597 continue
1586 continue
1598
1587
1599 if line.startswith(b'syntax:'):
1588 if line.startswith(b'syntax:'):
1600 s = line[7:].strip()
1589 s = line[7:].strip()
1601 try:
1590 try:
1602 syntax = syntaxes[s]
1591 syntax = syntaxes[s]
1603 except KeyError:
1592 except KeyError:
1604 if warn:
1593 if warn:
1605 warn(
1594 warn(
1606 _(b"%s: ignoring invalid syntax '%s'\n") % (filepath, s)
1595 _(b"%s: ignoring invalid syntax '%s'\n") % (filepath, s)
1607 )
1596 )
1608 continue
1597 continue
1609
1598
1610 linesyntax = syntax
1599 linesyntax = syntax
1611 for s, rels in pycompat.iteritems(syntaxes):
1600 for s, rels in pycompat.iteritems(syntaxes):
1612 if line.startswith(rels):
1601 if line.startswith(rels):
1613 linesyntax = rels
1602 linesyntax = rels
1614 line = line[len(rels) :]
1603 line = line[len(rels) :]
1615 break
1604 break
1616 elif line.startswith(s + b':'):
1605 elif line.startswith(s + b':'):
1617 linesyntax = rels
1606 linesyntax = rels
1618 line = line[len(s) + 1 :]
1607 line = line[len(s) + 1 :]
1619 break
1608 break
1620 if sourceinfo:
1609 if sourceinfo:
1621 patterns.append((linesyntax + line, lineno, line))
1610 patterns.append((linesyntax + line, lineno, line))
1622 else:
1611 else:
1623 patterns.append(linesyntax + line)
1612 patterns.append(linesyntax + line)
1624 fp.close()
1613 fp.close()
1625 return patterns
1614 return patterns
General Comments 0
You need to be logged in to leave comments. Login now