##// END OF EJS Templates
match: convert O(n) to O(log n) in exactmatcher.visitchildrenset...
Kyle Lippincott -
r47634:8bca353b default
parent child Browse files
Show More
@@ -1,1634 +1,1659 b''
1 # match.py - filename matching
1 # match.py - filename matching
2 #
2 #
3 # Copyright 2008, 2009 Olivia Mackall <olivia@selenic.com> and others
3 # Copyright 2008, 2009 Olivia Mackall <olivia@selenic.com> and others
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import, print_function
8 from __future__ import absolute_import, print_function
9
9
10 import bisect
10 import copy
11 import copy
11 import itertools
12 import itertools
12 import os
13 import os
13 import re
14 import re
14
15
15 from .i18n import _
16 from .i18n import _
16 from .pycompat import open
17 from .pycompat import open
17 from . import (
18 from . import (
18 encoding,
19 encoding,
19 error,
20 error,
20 pathutil,
21 pathutil,
21 policy,
22 policy,
22 pycompat,
23 pycompat,
23 util,
24 util,
24 )
25 )
25 from .utils import stringutil
26 from .utils import stringutil
26
27
27 rustmod = policy.importrust('dirstate')
28 rustmod = policy.importrust('dirstate')
28
29
29 allpatternkinds = (
30 allpatternkinds = (
30 b're',
31 b're',
31 b'glob',
32 b'glob',
32 b'path',
33 b'path',
33 b'relglob',
34 b'relglob',
34 b'relpath',
35 b'relpath',
35 b'relre',
36 b'relre',
36 b'rootglob',
37 b'rootglob',
37 b'listfile',
38 b'listfile',
38 b'listfile0',
39 b'listfile0',
39 b'set',
40 b'set',
40 b'include',
41 b'include',
41 b'subinclude',
42 b'subinclude',
42 b'rootfilesin',
43 b'rootfilesin',
43 )
44 )
44 cwdrelativepatternkinds = (b'relpath', b'glob')
45 cwdrelativepatternkinds = (b'relpath', b'glob')
45
46
46 propertycache = util.propertycache
47 propertycache = util.propertycache
47
48
48
49
49 def _rematcher(regex):
50 def _rematcher(regex):
50 """compile the regexp with the best available regexp engine and return a
51 """compile the regexp with the best available regexp engine and return a
51 matcher function"""
52 matcher function"""
52 m = util.re.compile(regex)
53 m = util.re.compile(regex)
53 try:
54 try:
54 # slightly faster, provided by facebook's re2 bindings
55 # slightly faster, provided by facebook's re2 bindings
55 return m.test_match
56 return m.test_match
56 except AttributeError:
57 except AttributeError:
57 return m.match
58 return m.match
58
59
59
60
60 def _expandsets(cwd, kindpats, ctx=None, listsubrepos=False, badfn=None):
61 def _expandsets(cwd, kindpats, ctx=None, listsubrepos=False, badfn=None):
61 '''Returns the kindpats list with the 'set' patterns expanded to matchers'''
62 '''Returns the kindpats list with the 'set' patterns expanded to matchers'''
62 matchers = []
63 matchers = []
63 other = []
64 other = []
64
65
65 for kind, pat, source in kindpats:
66 for kind, pat, source in kindpats:
66 if kind == b'set':
67 if kind == b'set':
67 if ctx is None:
68 if ctx is None:
68 raise error.ProgrammingError(
69 raise error.ProgrammingError(
69 b"fileset expression with no context"
70 b"fileset expression with no context"
70 )
71 )
71 matchers.append(ctx.matchfileset(cwd, pat, badfn=badfn))
72 matchers.append(ctx.matchfileset(cwd, pat, badfn=badfn))
72
73
73 if listsubrepos:
74 if listsubrepos:
74 for subpath in ctx.substate:
75 for subpath in ctx.substate:
75 sm = ctx.sub(subpath).matchfileset(cwd, pat, badfn=badfn)
76 sm = ctx.sub(subpath).matchfileset(cwd, pat, badfn=badfn)
76 pm = prefixdirmatcher(subpath, sm, badfn=badfn)
77 pm = prefixdirmatcher(subpath, sm, badfn=badfn)
77 matchers.append(pm)
78 matchers.append(pm)
78
79
79 continue
80 continue
80 other.append((kind, pat, source))
81 other.append((kind, pat, source))
81 return matchers, other
82 return matchers, other
82
83
83
84
84 def _expandsubinclude(kindpats, root):
85 def _expandsubinclude(kindpats, root):
85 """Returns the list of subinclude matcher args and the kindpats without the
86 """Returns the list of subinclude matcher args and the kindpats without the
86 subincludes in it."""
87 subincludes in it."""
87 relmatchers = []
88 relmatchers = []
88 other = []
89 other = []
89
90
90 for kind, pat, source in kindpats:
91 for kind, pat, source in kindpats:
91 if kind == b'subinclude':
92 if kind == b'subinclude':
92 sourceroot = pathutil.dirname(util.normpath(source))
93 sourceroot = pathutil.dirname(util.normpath(source))
93 pat = util.pconvert(pat)
94 pat = util.pconvert(pat)
94 path = pathutil.join(sourceroot, pat)
95 path = pathutil.join(sourceroot, pat)
95
96
96 newroot = pathutil.dirname(path)
97 newroot = pathutil.dirname(path)
97 matcherargs = (newroot, b'', [], [b'include:%s' % path])
98 matcherargs = (newroot, b'', [], [b'include:%s' % path])
98
99
99 prefix = pathutil.canonpath(root, root, newroot)
100 prefix = pathutil.canonpath(root, root, newroot)
100 if prefix:
101 if prefix:
101 prefix += b'/'
102 prefix += b'/'
102 relmatchers.append((prefix, matcherargs))
103 relmatchers.append((prefix, matcherargs))
103 else:
104 else:
104 other.append((kind, pat, source))
105 other.append((kind, pat, source))
105
106
106 return relmatchers, other
107 return relmatchers, other
107
108
108
109
109 def _kindpatsalwaysmatch(kindpats):
110 def _kindpatsalwaysmatch(kindpats):
110 """Checks whether the kindspats match everything, as e.g.
111 """Checks whether the kindspats match everything, as e.g.
111 'relpath:.' does.
112 'relpath:.' does.
112 """
113 """
113 for kind, pat, source in kindpats:
114 for kind, pat, source in kindpats:
114 if pat != b'' or kind not in [b'relpath', b'glob']:
115 if pat != b'' or kind not in [b'relpath', b'glob']:
115 return False
116 return False
116 return True
117 return True
117
118
118
119
119 def _buildkindpatsmatcher(
120 def _buildkindpatsmatcher(
120 matchercls,
121 matchercls,
121 root,
122 root,
122 cwd,
123 cwd,
123 kindpats,
124 kindpats,
124 ctx=None,
125 ctx=None,
125 listsubrepos=False,
126 listsubrepos=False,
126 badfn=None,
127 badfn=None,
127 ):
128 ):
128 matchers = []
129 matchers = []
129 fms, kindpats = _expandsets(
130 fms, kindpats = _expandsets(
130 cwd,
131 cwd,
131 kindpats,
132 kindpats,
132 ctx=ctx,
133 ctx=ctx,
133 listsubrepos=listsubrepos,
134 listsubrepos=listsubrepos,
134 badfn=badfn,
135 badfn=badfn,
135 )
136 )
136 if kindpats:
137 if kindpats:
137 m = matchercls(root, kindpats, badfn=badfn)
138 m = matchercls(root, kindpats, badfn=badfn)
138 matchers.append(m)
139 matchers.append(m)
139 if fms:
140 if fms:
140 matchers.extend(fms)
141 matchers.extend(fms)
141 if not matchers:
142 if not matchers:
142 return nevermatcher(badfn=badfn)
143 return nevermatcher(badfn=badfn)
143 if len(matchers) == 1:
144 if len(matchers) == 1:
144 return matchers[0]
145 return matchers[0]
145 return unionmatcher(matchers)
146 return unionmatcher(matchers)
146
147
147
148
148 def match(
149 def match(
149 root,
150 root,
150 cwd,
151 cwd,
151 patterns=None,
152 patterns=None,
152 include=None,
153 include=None,
153 exclude=None,
154 exclude=None,
154 default=b'glob',
155 default=b'glob',
155 auditor=None,
156 auditor=None,
156 ctx=None,
157 ctx=None,
157 listsubrepos=False,
158 listsubrepos=False,
158 warn=None,
159 warn=None,
159 badfn=None,
160 badfn=None,
160 icasefs=False,
161 icasefs=False,
161 ):
162 ):
162 r"""build an object to match a set of file patterns
163 r"""build an object to match a set of file patterns
163
164
164 arguments:
165 arguments:
165 root - the canonical root of the tree you're matching against
166 root - the canonical root of the tree you're matching against
166 cwd - the current working directory, if relevant
167 cwd - the current working directory, if relevant
167 patterns - patterns to find
168 patterns - patterns to find
168 include - patterns to include (unless they are excluded)
169 include - patterns to include (unless they are excluded)
169 exclude - patterns to exclude (even if they are included)
170 exclude - patterns to exclude (even if they are included)
170 default - if a pattern in patterns has no explicit type, assume this one
171 default - if a pattern in patterns has no explicit type, assume this one
171 auditor - optional path auditor
172 auditor - optional path auditor
172 ctx - optional changecontext
173 ctx - optional changecontext
173 listsubrepos - if True, recurse into subrepositories
174 listsubrepos - if True, recurse into subrepositories
174 warn - optional function used for printing warnings
175 warn - optional function used for printing warnings
175 badfn - optional bad() callback for this matcher instead of the default
176 badfn - optional bad() callback for this matcher instead of the default
176 icasefs - make a matcher for wdir on case insensitive filesystems, which
177 icasefs - make a matcher for wdir on case insensitive filesystems, which
177 normalizes the given patterns to the case in the filesystem
178 normalizes the given patterns to the case in the filesystem
178
179
179 a pattern is one of:
180 a pattern is one of:
180 'glob:<glob>' - a glob relative to cwd
181 'glob:<glob>' - a glob relative to cwd
181 're:<regexp>' - a regular expression
182 're:<regexp>' - a regular expression
182 'path:<path>' - a path relative to repository root, which is matched
183 'path:<path>' - a path relative to repository root, which is matched
183 recursively
184 recursively
184 'rootfilesin:<path>' - a path relative to repository root, which is
185 'rootfilesin:<path>' - a path relative to repository root, which is
185 matched non-recursively (will not match subdirectories)
186 matched non-recursively (will not match subdirectories)
186 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
187 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
187 'relpath:<path>' - a path relative to cwd
188 'relpath:<path>' - a path relative to cwd
188 'relre:<regexp>' - a regexp that needn't match the start of a name
189 'relre:<regexp>' - a regexp that needn't match the start of a name
189 'set:<fileset>' - a fileset expression
190 'set:<fileset>' - a fileset expression
190 'include:<path>' - a file of patterns to read and include
191 'include:<path>' - a file of patterns to read and include
191 'subinclude:<path>' - a file of patterns to match against files under
192 'subinclude:<path>' - a file of patterns to match against files under
192 the same directory
193 the same directory
193 '<something>' - a pattern of the specified default type
194 '<something>' - a pattern of the specified default type
194
195
195 >>> def _match(root, *args, **kwargs):
196 >>> def _match(root, *args, **kwargs):
196 ... return match(util.localpath(root), *args, **kwargs)
197 ... return match(util.localpath(root), *args, **kwargs)
197
198
198 Usually a patternmatcher is returned:
199 Usually a patternmatcher is returned:
199 >>> _match(b'/foo', b'.', [b're:.*\.c$', b'path:foo/a', b'*.py'])
200 >>> _match(b'/foo', b'.', [b're:.*\.c$', b'path:foo/a', b'*.py'])
200 <patternmatcher patterns='.*\\.c$|foo/a(?:/|$)|[^/]*\\.py$'>
201 <patternmatcher patterns='.*\\.c$|foo/a(?:/|$)|[^/]*\\.py$'>
201
202
202 Combining 'patterns' with 'include' (resp. 'exclude') gives an
203 Combining 'patterns' with 'include' (resp. 'exclude') gives an
203 intersectionmatcher (resp. a differencematcher):
204 intersectionmatcher (resp. a differencematcher):
204 >>> type(_match(b'/foo', b'.', [b're:.*\.c$'], include=[b'path:lib']))
205 >>> type(_match(b'/foo', b'.', [b're:.*\.c$'], include=[b'path:lib']))
205 <class 'mercurial.match.intersectionmatcher'>
206 <class 'mercurial.match.intersectionmatcher'>
206 >>> type(_match(b'/foo', b'.', [b're:.*\.c$'], exclude=[b'path:build']))
207 >>> type(_match(b'/foo', b'.', [b're:.*\.c$'], exclude=[b'path:build']))
207 <class 'mercurial.match.differencematcher'>
208 <class 'mercurial.match.differencematcher'>
208
209
209 Notice that, if 'patterns' is empty, an alwaysmatcher is returned:
210 Notice that, if 'patterns' is empty, an alwaysmatcher is returned:
210 >>> _match(b'/foo', b'.', [])
211 >>> _match(b'/foo', b'.', [])
211 <alwaysmatcher>
212 <alwaysmatcher>
212
213
213 The 'default' argument determines which kind of pattern is assumed if a
214 The 'default' argument determines which kind of pattern is assumed if a
214 pattern has no prefix:
215 pattern has no prefix:
215 >>> _match(b'/foo', b'.', [b'.*\.c$'], default=b're')
216 >>> _match(b'/foo', b'.', [b'.*\.c$'], default=b're')
216 <patternmatcher patterns='.*\\.c$'>
217 <patternmatcher patterns='.*\\.c$'>
217 >>> _match(b'/foo', b'.', [b'main.py'], default=b'relpath')
218 >>> _match(b'/foo', b'.', [b'main.py'], default=b'relpath')
218 <patternmatcher patterns='main\\.py(?:/|$)'>
219 <patternmatcher patterns='main\\.py(?:/|$)'>
219 >>> _match(b'/foo', b'.', [b'main.py'], default=b're')
220 >>> _match(b'/foo', b'.', [b'main.py'], default=b're')
220 <patternmatcher patterns='main.py'>
221 <patternmatcher patterns='main.py'>
221
222
222 The primary use of matchers is to check whether a value (usually a file
223 The primary use of matchers is to check whether a value (usually a file
223 name) matches againset one of the patterns given at initialization. There
224 name) matches againset one of the patterns given at initialization. There
224 are two ways of doing this check.
225 are two ways of doing this check.
225
226
226 >>> m = _match(b'/foo', b'', [b're:.*\.c$', b'relpath:a'])
227 >>> m = _match(b'/foo', b'', [b're:.*\.c$', b'relpath:a'])
227
228
228 1. Calling the matcher with a file name returns True if any pattern
229 1. Calling the matcher with a file name returns True if any pattern
229 matches that file name:
230 matches that file name:
230 >>> m(b'a')
231 >>> m(b'a')
231 True
232 True
232 >>> m(b'main.c')
233 >>> m(b'main.c')
233 True
234 True
234 >>> m(b'test.py')
235 >>> m(b'test.py')
235 False
236 False
236
237
237 2. Using the exact() method only returns True if the file name matches one
238 2. Using the exact() method only returns True if the file name matches one
238 of the exact patterns (i.e. not re: or glob: patterns):
239 of the exact patterns (i.e. not re: or glob: patterns):
239 >>> m.exact(b'a')
240 >>> m.exact(b'a')
240 True
241 True
241 >>> m.exact(b'main.c')
242 >>> m.exact(b'main.c')
242 False
243 False
243 """
244 """
244 assert os.path.isabs(root)
245 assert os.path.isabs(root)
245 cwd = os.path.join(root, util.localpath(cwd))
246 cwd = os.path.join(root, util.localpath(cwd))
246 normalize = _donormalize
247 normalize = _donormalize
247 if icasefs:
248 if icasefs:
248 dirstate = ctx.repo().dirstate
249 dirstate = ctx.repo().dirstate
249 dsnormalize = dirstate.normalize
250 dsnormalize = dirstate.normalize
250
251
251 def normalize(patterns, default, root, cwd, auditor, warn):
252 def normalize(patterns, default, root, cwd, auditor, warn):
252 kp = _donormalize(patterns, default, root, cwd, auditor, warn)
253 kp = _donormalize(patterns, default, root, cwd, auditor, warn)
253 kindpats = []
254 kindpats = []
254 for kind, pats, source in kp:
255 for kind, pats, source in kp:
255 if kind not in (b're', b'relre'): # regex can't be normalized
256 if kind not in (b're', b'relre'): # regex can't be normalized
256 p = pats
257 p = pats
257 pats = dsnormalize(pats)
258 pats = dsnormalize(pats)
258
259
259 # Preserve the original to handle a case only rename.
260 # Preserve the original to handle a case only rename.
260 if p != pats and p in dirstate:
261 if p != pats and p in dirstate:
261 kindpats.append((kind, p, source))
262 kindpats.append((kind, p, source))
262
263
263 kindpats.append((kind, pats, source))
264 kindpats.append((kind, pats, source))
264 return kindpats
265 return kindpats
265
266
266 if patterns:
267 if patterns:
267 kindpats = normalize(patterns, default, root, cwd, auditor, warn)
268 kindpats = normalize(patterns, default, root, cwd, auditor, warn)
268 if _kindpatsalwaysmatch(kindpats):
269 if _kindpatsalwaysmatch(kindpats):
269 m = alwaysmatcher(badfn)
270 m = alwaysmatcher(badfn)
270 else:
271 else:
271 m = _buildkindpatsmatcher(
272 m = _buildkindpatsmatcher(
272 patternmatcher,
273 patternmatcher,
273 root,
274 root,
274 cwd,
275 cwd,
275 kindpats,
276 kindpats,
276 ctx=ctx,
277 ctx=ctx,
277 listsubrepos=listsubrepos,
278 listsubrepos=listsubrepos,
278 badfn=badfn,
279 badfn=badfn,
279 )
280 )
280 else:
281 else:
281 # It's a little strange that no patterns means to match everything.
282 # It's a little strange that no patterns means to match everything.
282 # Consider changing this to match nothing (probably using nevermatcher).
283 # Consider changing this to match nothing (probably using nevermatcher).
283 m = alwaysmatcher(badfn)
284 m = alwaysmatcher(badfn)
284
285
285 if include:
286 if include:
286 kindpats = normalize(include, b'glob', root, cwd, auditor, warn)
287 kindpats = normalize(include, b'glob', root, cwd, auditor, warn)
287 im = _buildkindpatsmatcher(
288 im = _buildkindpatsmatcher(
288 includematcher,
289 includematcher,
289 root,
290 root,
290 cwd,
291 cwd,
291 kindpats,
292 kindpats,
292 ctx=ctx,
293 ctx=ctx,
293 listsubrepos=listsubrepos,
294 listsubrepos=listsubrepos,
294 badfn=None,
295 badfn=None,
295 )
296 )
296 m = intersectmatchers(m, im)
297 m = intersectmatchers(m, im)
297 if exclude:
298 if exclude:
298 kindpats = normalize(exclude, b'glob', root, cwd, auditor, warn)
299 kindpats = normalize(exclude, b'glob', root, cwd, auditor, warn)
299 em = _buildkindpatsmatcher(
300 em = _buildkindpatsmatcher(
300 includematcher,
301 includematcher,
301 root,
302 root,
302 cwd,
303 cwd,
303 kindpats,
304 kindpats,
304 ctx=ctx,
305 ctx=ctx,
305 listsubrepos=listsubrepos,
306 listsubrepos=listsubrepos,
306 badfn=None,
307 badfn=None,
307 )
308 )
308 m = differencematcher(m, em)
309 m = differencematcher(m, em)
309 return m
310 return m
310
311
311
312
312 def exact(files, badfn=None):
313 def exact(files, badfn=None):
313 return exactmatcher(files, badfn=badfn)
314 return exactmatcher(files, badfn=badfn)
314
315
315
316
316 def always(badfn=None):
317 def always(badfn=None):
317 return alwaysmatcher(badfn)
318 return alwaysmatcher(badfn)
318
319
319
320
320 def never(badfn=None):
321 def never(badfn=None):
321 return nevermatcher(badfn)
322 return nevermatcher(badfn)
322
323
323
324
324 def badmatch(match, badfn):
325 def badmatch(match, badfn):
325 """Make a copy of the given matcher, replacing its bad method with the given
326 """Make a copy of the given matcher, replacing its bad method with the given
326 one.
327 one.
327 """
328 """
328 m = copy.copy(match)
329 m = copy.copy(match)
329 m.bad = badfn
330 m.bad = badfn
330 return m
331 return m
331
332
332
333
333 def _donormalize(patterns, default, root, cwd, auditor=None, warn=None):
334 def _donormalize(patterns, default, root, cwd, auditor=None, warn=None):
334 """Convert 'kind:pat' from the patterns list to tuples with kind and
335 """Convert 'kind:pat' from the patterns list to tuples with kind and
335 normalized and rooted patterns and with listfiles expanded."""
336 normalized and rooted patterns and with listfiles expanded."""
336 kindpats = []
337 kindpats = []
337 for kind, pat in [_patsplit(p, default) for p in patterns]:
338 for kind, pat in [_patsplit(p, default) for p in patterns]:
338 if kind in cwdrelativepatternkinds:
339 if kind in cwdrelativepatternkinds:
339 pat = pathutil.canonpath(root, cwd, pat, auditor=auditor)
340 pat = pathutil.canonpath(root, cwd, pat, auditor=auditor)
340 elif kind in (b'relglob', b'path', b'rootfilesin', b'rootglob'):
341 elif kind in (b'relglob', b'path', b'rootfilesin', b'rootglob'):
341 pat = util.normpath(pat)
342 pat = util.normpath(pat)
342 elif kind in (b'listfile', b'listfile0'):
343 elif kind in (b'listfile', b'listfile0'):
343 try:
344 try:
344 files = util.readfile(pat)
345 files = util.readfile(pat)
345 if kind == b'listfile0':
346 if kind == b'listfile0':
346 files = files.split(b'\0')
347 files = files.split(b'\0')
347 else:
348 else:
348 files = files.splitlines()
349 files = files.splitlines()
349 files = [f for f in files if f]
350 files = [f for f in files if f]
350 except EnvironmentError:
351 except EnvironmentError:
351 raise error.Abort(_(b"unable to read file list (%s)") % pat)
352 raise error.Abort(_(b"unable to read file list (%s)") % pat)
352 for k, p, source in _donormalize(
353 for k, p, source in _donormalize(
353 files, default, root, cwd, auditor, warn
354 files, default, root, cwd, auditor, warn
354 ):
355 ):
355 kindpats.append((k, p, pat))
356 kindpats.append((k, p, pat))
356 continue
357 continue
357 elif kind == b'include':
358 elif kind == b'include':
358 try:
359 try:
359 fullpath = os.path.join(root, util.localpath(pat))
360 fullpath = os.path.join(root, util.localpath(pat))
360 includepats = readpatternfile(fullpath, warn)
361 includepats = readpatternfile(fullpath, warn)
361 for k, p, source in _donormalize(
362 for k, p, source in _donormalize(
362 includepats, default, root, cwd, auditor, warn
363 includepats, default, root, cwd, auditor, warn
363 ):
364 ):
364 kindpats.append((k, p, source or pat))
365 kindpats.append((k, p, source or pat))
365 except error.Abort as inst:
366 except error.Abort as inst:
366 raise error.Abort(
367 raise error.Abort(
367 b'%s: %s'
368 b'%s: %s'
368 % (
369 % (
369 pat,
370 pat,
370 inst.message,
371 inst.message,
371 ) # pytype: disable=unsupported-operands
372 ) # pytype: disable=unsupported-operands
372 )
373 )
373 except IOError as inst:
374 except IOError as inst:
374 if warn:
375 if warn:
375 warn(
376 warn(
376 _(b"skipping unreadable pattern file '%s': %s\n")
377 _(b"skipping unreadable pattern file '%s': %s\n")
377 % (pat, stringutil.forcebytestr(inst.strerror))
378 % (pat, stringutil.forcebytestr(inst.strerror))
378 )
379 )
379 continue
380 continue
380 # else: re or relre - which cannot be normalized
381 # else: re or relre - which cannot be normalized
381 kindpats.append((kind, pat, b''))
382 kindpats.append((kind, pat, b''))
382 return kindpats
383 return kindpats
383
384
384
385
385 class basematcher(object):
386 class basematcher(object):
386 def __init__(self, badfn=None):
387 def __init__(self, badfn=None):
387 if badfn is not None:
388 if badfn is not None:
388 self.bad = badfn
389 self.bad = badfn
389
390
390 def __call__(self, fn):
391 def __call__(self, fn):
391 return self.matchfn(fn)
392 return self.matchfn(fn)
392
393
393 # Callbacks related to how the matcher is used by dirstate.walk.
394 # Callbacks related to how the matcher is used by dirstate.walk.
394 # Subscribers to these events must monkeypatch the matcher object.
395 # Subscribers to these events must monkeypatch the matcher object.
395 def bad(self, f, msg):
396 def bad(self, f, msg):
396 """Callback from dirstate.walk for each explicit file that can't be
397 """Callback from dirstate.walk for each explicit file that can't be
397 found/accessed, with an error message."""
398 found/accessed, with an error message."""
398
399
399 # If an traversedir is set, it will be called when a directory discovered
400 # If an traversedir is set, it will be called when a directory discovered
400 # by recursive traversal is visited.
401 # by recursive traversal is visited.
401 traversedir = None
402 traversedir = None
402
403
403 @propertycache
404 @propertycache
404 def _files(self):
405 def _files(self):
405 return []
406 return []
406
407
407 def files(self):
408 def files(self):
408 """Explicitly listed files or patterns or roots:
409 """Explicitly listed files or patterns or roots:
409 if no patterns or .always(): empty list,
410 if no patterns or .always(): empty list,
410 if exact: list exact files,
411 if exact: list exact files,
411 if not .anypats(): list all files and dirs,
412 if not .anypats(): list all files and dirs,
412 else: optimal roots"""
413 else: optimal roots"""
413 return self._files
414 return self._files
414
415
415 @propertycache
416 @propertycache
416 def _fileset(self):
417 def _fileset(self):
417 return set(self._files)
418 return set(self._files)
418
419
419 def exact(self, f):
420 def exact(self, f):
420 '''Returns True if f is in .files().'''
421 '''Returns True if f is in .files().'''
421 return f in self._fileset
422 return f in self._fileset
422
423
423 def matchfn(self, f):
424 def matchfn(self, f):
424 return False
425 return False
425
426
426 def visitdir(self, dir):
427 def visitdir(self, dir):
427 """Decides whether a directory should be visited based on whether it
428 """Decides whether a directory should be visited based on whether it
428 has potential matches in it or one of its subdirectories. This is
429 has potential matches in it or one of its subdirectories. This is
429 based on the match's primary, included, and excluded patterns.
430 based on the match's primary, included, and excluded patterns.
430
431
431 Returns the string 'all' if the given directory and all subdirectories
432 Returns the string 'all' if the given directory and all subdirectories
432 should be visited. Otherwise returns True or False indicating whether
433 should be visited. Otherwise returns True or False indicating whether
433 the given directory should be visited.
434 the given directory should be visited.
434 """
435 """
435 return True
436 return True
436
437
437 def visitchildrenset(self, dir):
438 def visitchildrenset(self, dir):
438 """Decides whether a directory should be visited based on whether it
439 """Decides whether a directory should be visited based on whether it
439 has potential matches in it or one of its subdirectories, and
440 has potential matches in it or one of its subdirectories, and
440 potentially lists which subdirectories of that directory should be
441 potentially lists which subdirectories of that directory should be
441 visited. This is based on the match's primary, included, and excluded
442 visited. This is based on the match's primary, included, and excluded
442 patterns.
443 patterns.
443
444
444 This function is very similar to 'visitdir', and the following mapping
445 This function is very similar to 'visitdir', and the following mapping
445 can be applied:
446 can be applied:
446
447
447 visitdir | visitchildrenlist
448 visitdir | visitchildrenlist
448 ----------+-------------------
449 ----------+-------------------
449 False | set()
450 False | set()
450 'all' | 'all'
451 'all' | 'all'
451 True | 'this' OR non-empty set of subdirs -or files- to visit
452 True | 'this' OR non-empty set of subdirs -or files- to visit
452
453
453 Example:
454 Example:
454 Assume matchers ['path:foo/bar', 'rootfilesin:qux'], we would return
455 Assume matchers ['path:foo/bar', 'rootfilesin:qux'], we would return
455 the following values (assuming the implementation of visitchildrenset
456 the following values (assuming the implementation of visitchildrenset
456 is capable of recognizing this; some implementations are not).
457 is capable of recognizing this; some implementations are not).
457
458
458 '' -> {'foo', 'qux'}
459 '' -> {'foo', 'qux'}
459 'baz' -> set()
460 'baz' -> set()
460 'foo' -> {'bar'}
461 'foo' -> {'bar'}
461 # Ideally this would be 'all', but since the prefix nature of matchers
462 # Ideally this would be 'all', but since the prefix nature of matchers
462 # is applied to the entire matcher, we have to downgrade this to
463 # is applied to the entire matcher, we have to downgrade this to
463 # 'this' due to the non-prefix 'rootfilesin'-kind matcher being mixed
464 # 'this' due to the non-prefix 'rootfilesin'-kind matcher being mixed
464 # in.
465 # in.
465 'foo/bar' -> 'this'
466 'foo/bar' -> 'this'
466 'qux' -> 'this'
467 'qux' -> 'this'
467
468
468 Important:
469 Important:
469 Most matchers do not know if they're representing files or
470 Most matchers do not know if they're representing files or
470 directories. They see ['path:dir/f'] and don't know whether 'f' is a
471 directories. They see ['path:dir/f'] and don't know whether 'f' is a
471 file or a directory, so visitchildrenset('dir') for most matchers will
472 file or a directory, so visitchildrenset('dir') for most matchers will
472 return {'f'}, but if the matcher knows it's a file (like exactmatcher
473 return {'f'}, but if the matcher knows it's a file (like exactmatcher
473 does), it may return 'this'. Do not rely on the return being a set
474 does), it may return 'this'. Do not rely on the return being a set
474 indicating that there are no files in this dir to investigate (or
475 indicating that there are no files in this dir to investigate (or
475 equivalently that if there are files to investigate in 'dir' that it
476 equivalently that if there are files to investigate in 'dir' that it
476 will always return 'this').
477 will always return 'this').
477 """
478 """
478 return b'this'
479 return b'this'
479
480
480 def always(self):
481 def always(self):
481 """Matcher will match everything and .files() will be empty --
482 """Matcher will match everything and .files() will be empty --
482 optimization might be possible."""
483 optimization might be possible."""
483 return False
484 return False
484
485
485 def isexact(self):
486 def isexact(self):
486 """Matcher will match exactly the list of files in .files() --
487 """Matcher will match exactly the list of files in .files() --
487 optimization might be possible."""
488 optimization might be possible."""
488 return False
489 return False
489
490
490 def prefix(self):
491 def prefix(self):
491 """Matcher will match the paths in .files() recursively --
492 """Matcher will match the paths in .files() recursively --
492 optimization might be possible."""
493 optimization might be possible."""
493 return False
494 return False
494
495
495 def anypats(self):
496 def anypats(self):
496 """None of .always(), .isexact(), and .prefix() is true --
497 """None of .always(), .isexact(), and .prefix() is true --
497 optimizations will be difficult."""
498 optimizations will be difficult."""
498 return not self.always() and not self.isexact() and not self.prefix()
499 return not self.always() and not self.isexact() and not self.prefix()
499
500
500
501
501 class alwaysmatcher(basematcher):
502 class alwaysmatcher(basematcher):
502 '''Matches everything.'''
503 '''Matches everything.'''
503
504
504 def __init__(self, badfn=None):
505 def __init__(self, badfn=None):
505 super(alwaysmatcher, self).__init__(badfn)
506 super(alwaysmatcher, self).__init__(badfn)
506
507
507 def always(self):
508 def always(self):
508 return True
509 return True
509
510
510 def matchfn(self, f):
511 def matchfn(self, f):
511 return True
512 return True
512
513
513 def visitdir(self, dir):
514 def visitdir(self, dir):
514 return b'all'
515 return b'all'
515
516
516 def visitchildrenset(self, dir):
517 def visitchildrenset(self, dir):
517 return b'all'
518 return b'all'
518
519
519 def __repr__(self):
520 def __repr__(self):
520 return r'<alwaysmatcher>'
521 return r'<alwaysmatcher>'
521
522
522
523
523 class nevermatcher(basematcher):
524 class nevermatcher(basematcher):
524 '''Matches nothing.'''
525 '''Matches nothing.'''
525
526
526 def __init__(self, badfn=None):
527 def __init__(self, badfn=None):
527 super(nevermatcher, self).__init__(badfn)
528 super(nevermatcher, self).__init__(badfn)
528
529
529 # It's a little weird to say that the nevermatcher is an exact matcher
530 # It's a little weird to say that the nevermatcher is an exact matcher
530 # or a prefix matcher, but it seems to make sense to let callers take
531 # or a prefix matcher, but it seems to make sense to let callers take
531 # fast paths based on either. There will be no exact matches, nor any
532 # fast paths based on either. There will be no exact matches, nor any
532 # prefixes (files() returns []), so fast paths iterating over them should
533 # prefixes (files() returns []), so fast paths iterating over them should
533 # be efficient (and correct).
534 # be efficient (and correct).
534 def isexact(self):
535 def isexact(self):
535 return True
536 return True
536
537
537 def prefix(self):
538 def prefix(self):
538 return True
539 return True
539
540
540 def visitdir(self, dir):
541 def visitdir(self, dir):
541 return False
542 return False
542
543
543 def visitchildrenset(self, dir):
544 def visitchildrenset(self, dir):
544 return set()
545 return set()
545
546
546 def __repr__(self):
547 def __repr__(self):
547 return r'<nevermatcher>'
548 return r'<nevermatcher>'
548
549
549
550
550 class predicatematcher(basematcher):
551 class predicatematcher(basematcher):
551 """A matcher adapter for a simple boolean function"""
552 """A matcher adapter for a simple boolean function"""
552
553
553 def __init__(self, predfn, predrepr=None, badfn=None):
554 def __init__(self, predfn, predrepr=None, badfn=None):
554 super(predicatematcher, self).__init__(badfn)
555 super(predicatematcher, self).__init__(badfn)
555 self.matchfn = predfn
556 self.matchfn = predfn
556 self._predrepr = predrepr
557 self._predrepr = predrepr
557
558
558 @encoding.strmethod
559 @encoding.strmethod
559 def __repr__(self):
560 def __repr__(self):
560 s = stringutil.buildrepr(self._predrepr) or pycompat.byterepr(
561 s = stringutil.buildrepr(self._predrepr) or pycompat.byterepr(
561 self.matchfn
562 self.matchfn
562 )
563 )
563 return b'<predicatenmatcher pred=%s>' % s
564 return b'<predicatenmatcher pred=%s>' % s
564
565
565
566
566 def path_or_parents_in_set(path, prefix_set):
567 def path_or_parents_in_set(path, prefix_set):
567 """Returns True if `path` (or any parent of `path`) is in `prefix_set`."""
568 """Returns True if `path` (or any parent of `path`) is in `prefix_set`."""
568 l = len(prefix_set)
569 l = len(prefix_set)
569 if l == 0:
570 if l == 0:
570 return False
571 return False
571 if path in prefix_set:
572 if path in prefix_set:
572 return True
573 return True
573 # If there's more than 5 paths in prefix_set, it's *probably* quicker to
574 # If there's more than 5 paths in prefix_set, it's *probably* quicker to
574 # "walk up" the directory hierarchy instead, with the assumption that most
575 # "walk up" the directory hierarchy instead, with the assumption that most
575 # directory hierarchies are relatively shallow and hash lookup is cheap.
576 # directory hierarchies are relatively shallow and hash lookup is cheap.
576 if l > 5:
577 if l > 5:
577 return any(
578 return any(
578 parentdir in prefix_set for parentdir in pathutil.finddirs(path)
579 parentdir in prefix_set for parentdir in pathutil.finddirs(path)
579 )
580 )
580
581
581 # FIXME: Ideally we'd never get to this point if this is the case - we'd
582 # FIXME: Ideally we'd never get to this point if this is the case - we'd
582 # recognize ourselves as an 'always' matcher and skip this.
583 # recognize ourselves as an 'always' matcher and skip this.
583 if b'' in prefix_set:
584 if b'' in prefix_set:
584 return True
585 return True
585
586
586 if pycompat.ispy3:
587 if pycompat.ispy3:
587 sl = ord(b'/')
588 sl = ord(b'/')
588 else:
589 else:
589 sl = '/'
590 sl = '/'
590
591
591 # We already checked that path isn't in prefix_set exactly, so
592 # We already checked that path isn't in prefix_set exactly, so
592 # `path[len(pf)] should never raise IndexError.
593 # `path[len(pf)] should never raise IndexError.
593 return any(path.startswith(pf) and path[len(pf)] == sl for pf in prefix_set)
594 return any(path.startswith(pf) and path[len(pf)] == sl for pf in prefix_set)
594
595
595
596
596 class patternmatcher(basematcher):
597 class patternmatcher(basematcher):
597 r"""Matches a set of (kind, pat, source) against a 'root' directory.
598 r"""Matches a set of (kind, pat, source) against a 'root' directory.
598
599
599 >>> kindpats = [
600 >>> kindpats = [
600 ... (b're', br'.*\.c$', b''),
601 ... (b're', br'.*\.c$', b''),
601 ... (b'path', b'foo/a', b''),
602 ... (b'path', b'foo/a', b''),
602 ... (b'relpath', b'b', b''),
603 ... (b'relpath', b'b', b''),
603 ... (b'glob', b'*.h', b''),
604 ... (b'glob', b'*.h', b''),
604 ... ]
605 ... ]
605 >>> m = patternmatcher(b'foo', kindpats)
606 >>> m = patternmatcher(b'foo', kindpats)
606 >>> m(b'main.c') # matches re:.*\.c$
607 >>> m(b'main.c') # matches re:.*\.c$
607 True
608 True
608 >>> m(b'b.txt')
609 >>> m(b'b.txt')
609 False
610 False
610 >>> m(b'foo/a') # matches path:foo/a
611 >>> m(b'foo/a') # matches path:foo/a
611 True
612 True
612 >>> m(b'a') # does not match path:b, since 'root' is 'foo'
613 >>> m(b'a') # does not match path:b, since 'root' is 'foo'
613 False
614 False
614 >>> m(b'b') # matches relpath:b, since 'root' is 'foo'
615 >>> m(b'b') # matches relpath:b, since 'root' is 'foo'
615 True
616 True
616 >>> m(b'lib.h') # matches glob:*.h
617 >>> m(b'lib.h') # matches glob:*.h
617 True
618 True
618
619
619 >>> m.files()
620 >>> m.files()
620 ['', 'foo/a', 'b', '']
621 ['', 'foo/a', 'b', '']
621 >>> m.exact(b'foo/a')
622 >>> m.exact(b'foo/a')
622 True
623 True
623 >>> m.exact(b'b')
624 >>> m.exact(b'b')
624 True
625 True
625 >>> m.exact(b'lib.h') # exact matches are for (rel)path kinds
626 >>> m.exact(b'lib.h') # exact matches are for (rel)path kinds
626 False
627 False
627 """
628 """
628
629
629 def __init__(self, root, kindpats, badfn=None):
630 def __init__(self, root, kindpats, badfn=None):
630 super(patternmatcher, self).__init__(badfn)
631 super(patternmatcher, self).__init__(badfn)
631
632
632 self._files = _explicitfiles(kindpats)
633 self._files = _explicitfiles(kindpats)
633 self._prefix = _prefix(kindpats)
634 self._prefix = _prefix(kindpats)
634 self._pats, self.matchfn = _buildmatch(kindpats, b'$', root)
635 self._pats, self.matchfn = _buildmatch(kindpats, b'$', root)
635
636
636 @propertycache
637 @propertycache
637 def _dirs(self):
638 def _dirs(self):
638 return set(pathutil.dirs(self._fileset))
639 return set(pathutil.dirs(self._fileset))
639
640
640 def visitdir(self, dir):
641 def visitdir(self, dir):
641 if self._prefix and dir in self._fileset:
642 if self._prefix and dir in self._fileset:
642 return b'all'
643 return b'all'
643 return dir in self._dirs or path_or_parents_in_set(dir, self._fileset)
644 return dir in self._dirs or path_or_parents_in_set(dir, self._fileset)
644
645
645 def visitchildrenset(self, dir):
646 def visitchildrenset(self, dir):
646 ret = self.visitdir(dir)
647 ret = self.visitdir(dir)
647 if ret is True:
648 if ret is True:
648 return b'this'
649 return b'this'
649 elif not ret:
650 elif not ret:
650 return set()
651 return set()
651 assert ret == b'all'
652 assert ret == b'all'
652 return b'all'
653 return b'all'
653
654
654 def prefix(self):
655 def prefix(self):
655 return self._prefix
656 return self._prefix
656
657
657 @encoding.strmethod
658 @encoding.strmethod
658 def __repr__(self):
659 def __repr__(self):
659 return b'<patternmatcher patterns=%r>' % pycompat.bytestr(self._pats)
660 return b'<patternmatcher patterns=%r>' % pycompat.bytestr(self._pats)
660
661
661
662
662 # This is basically a reimplementation of pathutil.dirs that stores the
663 # This is basically a reimplementation of pathutil.dirs that stores the
663 # children instead of just a count of them, plus a small optional optimization
664 # children instead of just a count of them, plus a small optional optimization
664 # to avoid some directories we don't need.
665 # to avoid some directories we don't need.
665 class _dirchildren(object):
666 class _dirchildren(object):
666 def __init__(self, paths, onlyinclude=None):
667 def __init__(self, paths, onlyinclude=None):
667 self._dirs = {}
668 self._dirs = {}
668 self._onlyinclude = onlyinclude or []
669 self._onlyinclude = onlyinclude or []
669 addpath = self.addpath
670 addpath = self.addpath
670 for f in paths:
671 for f in paths:
671 addpath(f)
672 addpath(f)
672
673
673 def addpath(self, path):
674 def addpath(self, path):
674 if path == b'':
675 if path == b'':
675 return
676 return
676 dirs = self._dirs
677 dirs = self._dirs
677 findsplitdirs = _dirchildren._findsplitdirs
678 findsplitdirs = _dirchildren._findsplitdirs
678 for d, b in findsplitdirs(path):
679 for d, b in findsplitdirs(path):
679 if d not in self._onlyinclude:
680 if d not in self._onlyinclude:
680 continue
681 continue
681 dirs.setdefault(d, set()).add(b)
682 dirs.setdefault(d, set()).add(b)
682
683
683 @staticmethod
684 @staticmethod
684 def _findsplitdirs(path):
685 def _findsplitdirs(path):
685 # yields (dirname, basename) tuples, walking back to the root. This is
686 # yields (dirname, basename) tuples, walking back to the root. This is
686 # very similar to pathutil.finddirs, except:
687 # very similar to pathutil.finddirs, except:
687 # - produces a (dirname, basename) tuple, not just 'dirname'
688 # - produces a (dirname, basename) tuple, not just 'dirname'
688 # Unlike manifest._splittopdir, this does not suffix `dirname` with a
689 # Unlike manifest._splittopdir, this does not suffix `dirname` with a
689 # slash.
690 # slash.
690 oldpos = len(path)
691 oldpos = len(path)
691 pos = path.rfind(b'/')
692 pos = path.rfind(b'/')
692 while pos != -1:
693 while pos != -1:
693 yield path[:pos], path[pos + 1 : oldpos]
694 yield path[:pos], path[pos + 1 : oldpos]
694 oldpos = pos
695 oldpos = pos
695 pos = path.rfind(b'/', 0, pos)
696 pos = path.rfind(b'/', 0, pos)
696 yield b'', path[:oldpos]
697 yield b'', path[:oldpos]
697
698
698 def get(self, path):
699 def get(self, path):
699 return self._dirs.get(path, set())
700 return self._dirs.get(path, set())
700
701
701
702
702 class includematcher(basematcher):
703 class includematcher(basematcher):
703 def __init__(self, root, kindpats, badfn=None):
704 def __init__(self, root, kindpats, badfn=None):
704 super(includematcher, self).__init__(badfn)
705 super(includematcher, self).__init__(badfn)
705 if rustmod is not None:
706 if rustmod is not None:
706 # We need to pass the patterns to Rust because they can contain
707 # We need to pass the patterns to Rust because they can contain
707 # patterns from the user interface
708 # patterns from the user interface
708 self._kindpats = kindpats
709 self._kindpats = kindpats
709 self._pats, self.matchfn = _buildmatch(kindpats, b'(?:/|$)', root)
710 self._pats, self.matchfn = _buildmatch(kindpats, b'(?:/|$)', root)
710 self._prefix = _prefix(kindpats)
711 self._prefix = _prefix(kindpats)
711 roots, dirs, parents = _rootsdirsandparents(kindpats)
712 roots, dirs, parents = _rootsdirsandparents(kindpats)
712 # roots are directories which are recursively included.
713 # roots are directories which are recursively included.
713 self._roots = set(roots)
714 self._roots = set(roots)
714 # dirs are directories which are non-recursively included.
715 # dirs are directories which are non-recursively included.
715 self._dirs = set(dirs)
716 self._dirs = set(dirs)
716 # parents are directories which are non-recursively included because
717 # parents are directories which are non-recursively included because
717 # they are needed to get to items in _dirs or _roots.
718 # they are needed to get to items in _dirs or _roots.
718 self._parents = parents
719 self._parents = parents
719
720
720 def visitdir(self, dir):
721 def visitdir(self, dir):
721 if self._prefix and dir in self._roots:
722 if self._prefix and dir in self._roots:
722 return b'all'
723 return b'all'
723 return (
724 return (
724 dir in self._dirs
725 dir in self._dirs
725 or dir in self._parents
726 or dir in self._parents
726 or path_or_parents_in_set(dir, self._roots)
727 or path_or_parents_in_set(dir, self._roots)
727 )
728 )
728
729
729 @propertycache
730 @propertycache
730 def _allparentschildren(self):
731 def _allparentschildren(self):
731 # It may seem odd that we add dirs, roots, and parents, and then
732 # It may seem odd that we add dirs, roots, and parents, and then
732 # restrict to only parents. This is to catch the case of:
733 # restrict to only parents. This is to catch the case of:
733 # dirs = ['foo/bar']
734 # dirs = ['foo/bar']
734 # parents = ['foo']
735 # parents = ['foo']
735 # if we asked for the children of 'foo', but had only added
736 # if we asked for the children of 'foo', but had only added
736 # self._parents, we wouldn't be able to respond ['bar'].
737 # self._parents, we wouldn't be able to respond ['bar'].
737 return _dirchildren(
738 return _dirchildren(
738 itertools.chain(self._dirs, self._roots, self._parents),
739 itertools.chain(self._dirs, self._roots, self._parents),
739 onlyinclude=self._parents,
740 onlyinclude=self._parents,
740 )
741 )
741
742
742 def visitchildrenset(self, dir):
743 def visitchildrenset(self, dir):
743 if self._prefix and dir in self._roots:
744 if self._prefix and dir in self._roots:
744 return b'all'
745 return b'all'
745 # Note: this does *not* include the 'dir in self._parents' case from
746 # Note: this does *not* include the 'dir in self._parents' case from
746 # visitdir, that's handled below.
747 # visitdir, that's handled below.
747 if (
748 if (
748 b'' in self._roots
749 b'' in self._roots
749 or dir in self._dirs
750 or dir in self._dirs
750 or path_or_parents_in_set(dir, self._roots)
751 or path_or_parents_in_set(dir, self._roots)
751 ):
752 ):
752 return b'this'
753 return b'this'
753
754
754 if dir in self._parents:
755 if dir in self._parents:
755 return self._allparentschildren.get(dir) or set()
756 return self._allparentschildren.get(dir) or set()
756 return set()
757 return set()
757
758
758 @encoding.strmethod
759 @encoding.strmethod
759 def __repr__(self):
760 def __repr__(self):
760 return b'<includematcher includes=%r>' % pycompat.bytestr(self._pats)
761 return b'<includematcher includes=%r>' % pycompat.bytestr(self._pats)
761
762
762
763
763 class exactmatcher(basematcher):
764 class exactmatcher(basematcher):
764 r"""Matches the input files exactly. They are interpreted as paths, not
765 r"""Matches the input files exactly. They are interpreted as paths, not
765 patterns (so no kind-prefixes).
766 patterns (so no kind-prefixes).
766
767
767 >>> m = exactmatcher([b'a.txt', br're:.*\.c$'])
768 >>> m = exactmatcher([b'a.txt', br're:.*\.c$'])
768 >>> m(b'a.txt')
769 >>> m(b'a.txt')
769 True
770 True
770 >>> m(b'b.txt')
771 >>> m(b'b.txt')
771 False
772 False
772
773
773 Input files that would be matched are exactly those returned by .files()
774 Input files that would be matched are exactly those returned by .files()
774 >>> m.files()
775 >>> m.files()
775 ['a.txt', 're:.*\\.c$']
776 ['a.txt', 're:.*\\.c$']
776
777
777 So pattern 're:.*\.c$' is not considered as a regex, but as a file name
778 So pattern 're:.*\.c$' is not considered as a regex, but as a file name
778 >>> m(b'main.c')
779 >>> m(b'main.c')
779 False
780 False
780 >>> m(br're:.*\.c$')
781 >>> m(br're:.*\.c$')
781 True
782 True
782 """
783 """
783
784
784 def __init__(self, files, badfn=None):
785 def __init__(self, files, badfn=None):
785 super(exactmatcher, self).__init__(badfn)
786 super(exactmatcher, self).__init__(badfn)
786
787
787 if isinstance(files, list):
788 if isinstance(files, list):
788 self._files = files
789 self._files = files
789 else:
790 else:
790 self._files = list(files)
791 self._files = list(files)
791
792
792 matchfn = basematcher.exact
793 matchfn = basematcher.exact
793
794
794 @propertycache
795 @propertycache
795 def _dirs(self):
796 def _dirs(self):
796 return set(pathutil.dirs(self._fileset))
797 return set(pathutil.dirs(self._fileset))
797
798
798 def visitdir(self, dir):
799 def visitdir(self, dir):
799 return dir in self._dirs
800 return dir in self._dirs
800
801
802 @propertycache
803 def _visitchildrenset_candidates(self):
804 """A memoized set of candidates for visitchildrenset."""
805 return self._fileset | self._dirs - {b''}
806
807 @propertycache
808 def _sorted_visitchildrenset_candidates(self):
809 """A memoized sorted list of candidates for visitchildrenset."""
810 return sorted(self._visitchildrenset_candidates)
811
801 def visitchildrenset(self, dir):
812 def visitchildrenset(self, dir):
802 if not self._fileset or dir not in self._dirs:
813 if not self._fileset or dir not in self._dirs:
803 return set()
814 return set()
804
815
805 candidates = self._fileset | self._dirs - {b''}
816 if dir == b'':
806 if dir != b'':
817 candidates = self._visitchildrenset_candidates
818 else:
819 candidates = self._sorted_visitchildrenset_candidates
807 d = dir + b'/'
820 d = dir + b'/'
808 candidates = {c[len(d) :] for c in candidates if c.startswith(d)}
821 # Use bisect to find the first element potentially starting with d
822 # (i.e. >= d). This should always find at least one element (we'll
823 # assert later if this is not the case).
824 first = bisect.bisect_left(candidates, d)
825 # We need a representation of the first element that is > d that
826 # does not start with d, so since we added a `/` on the end of dir,
827 # we'll add whatever comes after slash (we could probably assume
828 # that `0` is after `/`, but let's not) to the end of dir instead.
829 dnext = dir + encoding.strtolocal(chr(ord(b'/') + 1))
830 # Use bisect to find the first element >= d_next
831 last = bisect.bisect_left(candidates, dnext, lo=first)
832 dlen = len(d)
833 candidates = {c[dlen:] for c in candidates[first:last]}
809 # self._dirs includes all of the directories, recursively, so if
834 # self._dirs includes all of the directories, recursively, so if
810 # we're attempting to match foo/bar/baz.txt, it'll have '', 'foo',
835 # we're attempting to match foo/bar/baz.txt, it'll have '', 'foo',
811 # 'foo/bar' in it. Thus we can safely ignore a candidate that has a
836 # 'foo/bar' in it. Thus we can safely ignore a candidate that has a
812 # '/' in it, indicating a it's for a subdir-of-a-subdir; the
837 # '/' in it, indicating a it's for a subdir-of-a-subdir; the
813 # immediate subdir will be in there without a slash.
838 # immediate subdir will be in there without a slash.
814 ret = {c for c in candidates if b'/' not in c}
839 ret = {c for c in candidates if b'/' not in c}
815 # We really do not expect ret to be empty, since that would imply that
840 # We really do not expect ret to be empty, since that would imply that
816 # there's something in _dirs that didn't have a file in _fileset.
841 # there's something in _dirs that didn't have a file in _fileset.
817 assert ret
842 assert ret
818 return ret
843 return ret
819
844
820 def isexact(self):
845 def isexact(self):
821 return True
846 return True
822
847
823 @encoding.strmethod
848 @encoding.strmethod
824 def __repr__(self):
849 def __repr__(self):
825 return b'<exactmatcher files=%r>' % self._files
850 return b'<exactmatcher files=%r>' % self._files
826
851
827
852
828 class differencematcher(basematcher):
853 class differencematcher(basematcher):
829 """Composes two matchers by matching if the first matches and the second
854 """Composes two matchers by matching if the first matches and the second
830 does not.
855 does not.
831
856
832 The second matcher's non-matching-attributes (bad, traversedir) are ignored.
857 The second matcher's non-matching-attributes (bad, traversedir) are ignored.
833 """
858 """
834
859
835 def __init__(self, m1, m2):
860 def __init__(self, m1, m2):
836 super(differencematcher, self).__init__()
861 super(differencematcher, self).__init__()
837 self._m1 = m1
862 self._m1 = m1
838 self._m2 = m2
863 self._m2 = m2
839 self.bad = m1.bad
864 self.bad = m1.bad
840 self.traversedir = m1.traversedir
865 self.traversedir = m1.traversedir
841
866
842 def matchfn(self, f):
867 def matchfn(self, f):
843 return self._m1(f) and not self._m2(f)
868 return self._m1(f) and not self._m2(f)
844
869
845 @propertycache
870 @propertycache
846 def _files(self):
871 def _files(self):
847 if self.isexact():
872 if self.isexact():
848 return [f for f in self._m1.files() if self(f)]
873 return [f for f in self._m1.files() if self(f)]
849 # If m1 is not an exact matcher, we can't easily figure out the set of
874 # If m1 is not an exact matcher, we can't easily figure out the set of
850 # files, because its files() are not always files. For example, if
875 # files, because its files() are not always files. For example, if
851 # m1 is "path:dir" and m2 is "rootfileins:.", we don't
876 # m1 is "path:dir" and m2 is "rootfileins:.", we don't
852 # want to remove "dir" from the set even though it would match m2,
877 # want to remove "dir" from the set even though it would match m2,
853 # because the "dir" in m1 may not be a file.
878 # because the "dir" in m1 may not be a file.
854 return self._m1.files()
879 return self._m1.files()
855
880
856 def visitdir(self, dir):
881 def visitdir(self, dir):
857 if self._m2.visitdir(dir) == b'all':
882 if self._m2.visitdir(dir) == b'all':
858 return False
883 return False
859 elif not self._m2.visitdir(dir):
884 elif not self._m2.visitdir(dir):
860 # m2 does not match dir, we can return 'all' here if possible
885 # m2 does not match dir, we can return 'all' here if possible
861 return self._m1.visitdir(dir)
886 return self._m1.visitdir(dir)
862 return bool(self._m1.visitdir(dir))
887 return bool(self._m1.visitdir(dir))
863
888
864 def visitchildrenset(self, dir):
889 def visitchildrenset(self, dir):
865 m2_set = self._m2.visitchildrenset(dir)
890 m2_set = self._m2.visitchildrenset(dir)
866 if m2_set == b'all':
891 if m2_set == b'all':
867 return set()
892 return set()
868 m1_set = self._m1.visitchildrenset(dir)
893 m1_set = self._m1.visitchildrenset(dir)
869 # Possible values for m1: 'all', 'this', set(...), set()
894 # Possible values for m1: 'all', 'this', set(...), set()
870 # Possible values for m2: 'this', set(...), set()
895 # Possible values for m2: 'this', set(...), set()
871 # If m2 has nothing under here that we care about, return m1, even if
896 # If m2 has nothing under here that we care about, return m1, even if
872 # it's 'all'. This is a change in behavior from visitdir, which would
897 # it's 'all'. This is a change in behavior from visitdir, which would
873 # return True, not 'all', for some reason.
898 # return True, not 'all', for some reason.
874 if not m2_set:
899 if not m2_set:
875 return m1_set
900 return m1_set
876 if m1_set in [b'all', b'this']:
901 if m1_set in [b'all', b'this']:
877 # Never return 'all' here if m2_set is any kind of non-empty (either
902 # Never return 'all' here if m2_set is any kind of non-empty (either
878 # 'this' or set(foo)), since m2 might return set() for a
903 # 'this' or set(foo)), since m2 might return set() for a
879 # subdirectory.
904 # subdirectory.
880 return b'this'
905 return b'this'
881 # Possible values for m1: set(...), set()
906 # Possible values for m1: set(...), set()
882 # Possible values for m2: 'this', set(...)
907 # Possible values for m2: 'this', set(...)
883 # We ignore m2's set results. They're possibly incorrect:
908 # We ignore m2's set results. They're possibly incorrect:
884 # m1 = path:dir/subdir, m2=rootfilesin:dir, visitchildrenset(''):
909 # m1 = path:dir/subdir, m2=rootfilesin:dir, visitchildrenset(''):
885 # m1 returns {'dir'}, m2 returns {'dir'}, if we subtracted we'd
910 # m1 returns {'dir'}, m2 returns {'dir'}, if we subtracted we'd
886 # return set(), which is *not* correct, we still need to visit 'dir'!
911 # return set(), which is *not* correct, we still need to visit 'dir'!
887 return m1_set
912 return m1_set
888
913
889 def isexact(self):
914 def isexact(self):
890 return self._m1.isexact()
915 return self._m1.isexact()
891
916
892 @encoding.strmethod
917 @encoding.strmethod
893 def __repr__(self):
918 def __repr__(self):
894 return b'<differencematcher m1=%r, m2=%r>' % (self._m1, self._m2)
919 return b'<differencematcher m1=%r, m2=%r>' % (self._m1, self._m2)
895
920
896
921
897 def intersectmatchers(m1, m2):
922 def intersectmatchers(m1, m2):
898 """Composes two matchers by matching if both of them match.
923 """Composes two matchers by matching if both of them match.
899
924
900 The second matcher's non-matching-attributes (bad, traversedir) are ignored.
925 The second matcher's non-matching-attributes (bad, traversedir) are ignored.
901 """
926 """
902 if m1 is None or m2 is None:
927 if m1 is None or m2 is None:
903 return m1 or m2
928 return m1 or m2
904 if m1.always():
929 if m1.always():
905 m = copy.copy(m2)
930 m = copy.copy(m2)
906 # TODO: Consider encapsulating these things in a class so there's only
931 # TODO: Consider encapsulating these things in a class so there's only
907 # one thing to copy from m1.
932 # one thing to copy from m1.
908 m.bad = m1.bad
933 m.bad = m1.bad
909 m.traversedir = m1.traversedir
934 m.traversedir = m1.traversedir
910 return m
935 return m
911 if m2.always():
936 if m2.always():
912 m = copy.copy(m1)
937 m = copy.copy(m1)
913 return m
938 return m
914 return intersectionmatcher(m1, m2)
939 return intersectionmatcher(m1, m2)
915
940
916
941
917 class intersectionmatcher(basematcher):
942 class intersectionmatcher(basematcher):
918 def __init__(self, m1, m2):
943 def __init__(self, m1, m2):
919 super(intersectionmatcher, self).__init__()
944 super(intersectionmatcher, self).__init__()
920 self._m1 = m1
945 self._m1 = m1
921 self._m2 = m2
946 self._m2 = m2
922 self.bad = m1.bad
947 self.bad = m1.bad
923 self.traversedir = m1.traversedir
948 self.traversedir = m1.traversedir
924
949
925 @propertycache
950 @propertycache
926 def _files(self):
951 def _files(self):
927 if self.isexact():
952 if self.isexact():
928 m1, m2 = self._m1, self._m2
953 m1, m2 = self._m1, self._m2
929 if not m1.isexact():
954 if not m1.isexact():
930 m1, m2 = m2, m1
955 m1, m2 = m2, m1
931 return [f for f in m1.files() if m2(f)]
956 return [f for f in m1.files() if m2(f)]
932 # It neither m1 nor m2 is an exact matcher, we can't easily intersect
957 # It neither m1 nor m2 is an exact matcher, we can't easily intersect
933 # the set of files, because their files() are not always files. For
958 # the set of files, because their files() are not always files. For
934 # example, if intersecting a matcher "-I glob:foo.txt" with matcher of
959 # example, if intersecting a matcher "-I glob:foo.txt" with matcher of
935 # "path:dir2", we don't want to remove "dir2" from the set.
960 # "path:dir2", we don't want to remove "dir2" from the set.
936 return self._m1.files() + self._m2.files()
961 return self._m1.files() + self._m2.files()
937
962
938 def matchfn(self, f):
963 def matchfn(self, f):
939 return self._m1(f) and self._m2(f)
964 return self._m1(f) and self._m2(f)
940
965
941 def visitdir(self, dir):
966 def visitdir(self, dir):
942 visit1 = self._m1.visitdir(dir)
967 visit1 = self._m1.visitdir(dir)
943 if visit1 == b'all':
968 if visit1 == b'all':
944 return self._m2.visitdir(dir)
969 return self._m2.visitdir(dir)
945 # bool() because visit1=True + visit2='all' should not be 'all'
970 # bool() because visit1=True + visit2='all' should not be 'all'
946 return bool(visit1 and self._m2.visitdir(dir))
971 return bool(visit1 and self._m2.visitdir(dir))
947
972
948 def visitchildrenset(self, dir):
973 def visitchildrenset(self, dir):
949 m1_set = self._m1.visitchildrenset(dir)
974 m1_set = self._m1.visitchildrenset(dir)
950 if not m1_set:
975 if not m1_set:
951 return set()
976 return set()
952 m2_set = self._m2.visitchildrenset(dir)
977 m2_set = self._m2.visitchildrenset(dir)
953 if not m2_set:
978 if not m2_set:
954 return set()
979 return set()
955
980
956 if m1_set == b'all':
981 if m1_set == b'all':
957 return m2_set
982 return m2_set
958 elif m2_set == b'all':
983 elif m2_set == b'all':
959 return m1_set
984 return m1_set
960
985
961 if m1_set == b'this' or m2_set == b'this':
986 if m1_set == b'this' or m2_set == b'this':
962 return b'this'
987 return b'this'
963
988
964 assert isinstance(m1_set, set) and isinstance(m2_set, set)
989 assert isinstance(m1_set, set) and isinstance(m2_set, set)
965 return m1_set.intersection(m2_set)
990 return m1_set.intersection(m2_set)
966
991
967 def always(self):
992 def always(self):
968 return self._m1.always() and self._m2.always()
993 return self._m1.always() and self._m2.always()
969
994
970 def isexact(self):
995 def isexact(self):
971 return self._m1.isexact() or self._m2.isexact()
996 return self._m1.isexact() or self._m2.isexact()
972
997
973 @encoding.strmethod
998 @encoding.strmethod
974 def __repr__(self):
999 def __repr__(self):
975 return b'<intersectionmatcher m1=%r, m2=%r>' % (self._m1, self._m2)
1000 return b'<intersectionmatcher m1=%r, m2=%r>' % (self._m1, self._m2)
976
1001
977
1002
978 class subdirmatcher(basematcher):
1003 class subdirmatcher(basematcher):
979 """Adapt a matcher to work on a subdirectory only.
1004 """Adapt a matcher to work on a subdirectory only.
980
1005
981 The paths are remapped to remove/insert the path as needed:
1006 The paths are remapped to remove/insert the path as needed:
982
1007
983 >>> from . import pycompat
1008 >>> from . import pycompat
984 >>> m1 = match(util.localpath(b'/root'), b'', [b'a.txt', b'sub/b.txt'], auditor=lambda name: None)
1009 >>> m1 = match(util.localpath(b'/root'), b'', [b'a.txt', b'sub/b.txt'], auditor=lambda name: None)
985 >>> m2 = subdirmatcher(b'sub', m1)
1010 >>> m2 = subdirmatcher(b'sub', m1)
986 >>> m2(b'a.txt')
1011 >>> m2(b'a.txt')
987 False
1012 False
988 >>> m2(b'b.txt')
1013 >>> m2(b'b.txt')
989 True
1014 True
990 >>> m2.matchfn(b'a.txt')
1015 >>> m2.matchfn(b'a.txt')
991 False
1016 False
992 >>> m2.matchfn(b'b.txt')
1017 >>> m2.matchfn(b'b.txt')
993 True
1018 True
994 >>> m2.files()
1019 >>> m2.files()
995 ['b.txt']
1020 ['b.txt']
996 >>> m2.exact(b'b.txt')
1021 >>> m2.exact(b'b.txt')
997 True
1022 True
998 >>> def bad(f, msg):
1023 >>> def bad(f, msg):
999 ... print(pycompat.sysstr(b"%s: %s" % (f, msg)))
1024 ... print(pycompat.sysstr(b"%s: %s" % (f, msg)))
1000 >>> m1.bad = bad
1025 >>> m1.bad = bad
1001 >>> m2.bad(b'x.txt', b'No such file')
1026 >>> m2.bad(b'x.txt', b'No such file')
1002 sub/x.txt: No such file
1027 sub/x.txt: No such file
1003 """
1028 """
1004
1029
1005 def __init__(self, path, matcher):
1030 def __init__(self, path, matcher):
1006 super(subdirmatcher, self).__init__()
1031 super(subdirmatcher, self).__init__()
1007 self._path = path
1032 self._path = path
1008 self._matcher = matcher
1033 self._matcher = matcher
1009 self._always = matcher.always()
1034 self._always = matcher.always()
1010
1035
1011 self._files = [
1036 self._files = [
1012 f[len(path) + 1 :]
1037 f[len(path) + 1 :]
1013 for f in matcher._files
1038 for f in matcher._files
1014 if f.startswith(path + b"/")
1039 if f.startswith(path + b"/")
1015 ]
1040 ]
1016
1041
1017 # If the parent repo had a path to this subrepo and the matcher is
1042 # If the parent repo had a path to this subrepo and the matcher is
1018 # a prefix matcher, this submatcher always matches.
1043 # a prefix matcher, this submatcher always matches.
1019 if matcher.prefix():
1044 if matcher.prefix():
1020 self._always = any(f == path for f in matcher._files)
1045 self._always = any(f == path for f in matcher._files)
1021
1046
1022 def bad(self, f, msg):
1047 def bad(self, f, msg):
1023 self._matcher.bad(self._path + b"/" + f, msg)
1048 self._matcher.bad(self._path + b"/" + f, msg)
1024
1049
1025 def matchfn(self, f):
1050 def matchfn(self, f):
1026 # Some information is lost in the superclass's constructor, so we
1051 # Some information is lost in the superclass's constructor, so we
1027 # can not accurately create the matching function for the subdirectory
1052 # can not accurately create the matching function for the subdirectory
1028 # from the inputs. Instead, we override matchfn() and visitdir() to
1053 # from the inputs. Instead, we override matchfn() and visitdir() to
1029 # call the original matcher with the subdirectory path prepended.
1054 # call the original matcher with the subdirectory path prepended.
1030 return self._matcher.matchfn(self._path + b"/" + f)
1055 return self._matcher.matchfn(self._path + b"/" + f)
1031
1056
1032 def visitdir(self, dir):
1057 def visitdir(self, dir):
1033 if dir == b'':
1058 if dir == b'':
1034 dir = self._path
1059 dir = self._path
1035 else:
1060 else:
1036 dir = self._path + b"/" + dir
1061 dir = self._path + b"/" + dir
1037 return self._matcher.visitdir(dir)
1062 return self._matcher.visitdir(dir)
1038
1063
1039 def visitchildrenset(self, dir):
1064 def visitchildrenset(self, dir):
1040 if dir == b'':
1065 if dir == b'':
1041 dir = self._path
1066 dir = self._path
1042 else:
1067 else:
1043 dir = self._path + b"/" + dir
1068 dir = self._path + b"/" + dir
1044 return self._matcher.visitchildrenset(dir)
1069 return self._matcher.visitchildrenset(dir)
1045
1070
1046 def always(self):
1071 def always(self):
1047 return self._always
1072 return self._always
1048
1073
1049 def prefix(self):
1074 def prefix(self):
1050 return self._matcher.prefix() and not self._always
1075 return self._matcher.prefix() and not self._always
1051
1076
1052 @encoding.strmethod
1077 @encoding.strmethod
1053 def __repr__(self):
1078 def __repr__(self):
1054 return b'<subdirmatcher path=%r, matcher=%r>' % (
1079 return b'<subdirmatcher path=%r, matcher=%r>' % (
1055 self._path,
1080 self._path,
1056 self._matcher,
1081 self._matcher,
1057 )
1082 )
1058
1083
1059
1084
1060 class prefixdirmatcher(basematcher):
1085 class prefixdirmatcher(basematcher):
1061 """Adapt a matcher to work on a parent directory.
1086 """Adapt a matcher to work on a parent directory.
1062
1087
1063 The matcher's non-matching-attributes (bad, traversedir) are ignored.
1088 The matcher's non-matching-attributes (bad, traversedir) are ignored.
1064
1089
1065 The prefix path should usually be the relative path from the root of
1090 The prefix path should usually be the relative path from the root of
1066 this matcher to the root of the wrapped matcher.
1091 this matcher to the root of the wrapped matcher.
1067
1092
1068 >>> m1 = match(util.localpath(b'/root/d/e'), b'f', [b'../a.txt', b'b.txt'], auditor=lambda name: None)
1093 >>> m1 = match(util.localpath(b'/root/d/e'), b'f', [b'../a.txt', b'b.txt'], auditor=lambda name: None)
1069 >>> m2 = prefixdirmatcher(b'd/e', m1)
1094 >>> m2 = prefixdirmatcher(b'd/e', m1)
1070 >>> m2(b'a.txt')
1095 >>> m2(b'a.txt')
1071 False
1096 False
1072 >>> m2(b'd/e/a.txt')
1097 >>> m2(b'd/e/a.txt')
1073 True
1098 True
1074 >>> m2(b'd/e/b.txt')
1099 >>> m2(b'd/e/b.txt')
1075 False
1100 False
1076 >>> m2.files()
1101 >>> m2.files()
1077 ['d/e/a.txt', 'd/e/f/b.txt']
1102 ['d/e/a.txt', 'd/e/f/b.txt']
1078 >>> m2.exact(b'd/e/a.txt')
1103 >>> m2.exact(b'd/e/a.txt')
1079 True
1104 True
1080 >>> m2.visitdir(b'd')
1105 >>> m2.visitdir(b'd')
1081 True
1106 True
1082 >>> m2.visitdir(b'd/e')
1107 >>> m2.visitdir(b'd/e')
1083 True
1108 True
1084 >>> m2.visitdir(b'd/e/f')
1109 >>> m2.visitdir(b'd/e/f')
1085 True
1110 True
1086 >>> m2.visitdir(b'd/e/g')
1111 >>> m2.visitdir(b'd/e/g')
1087 False
1112 False
1088 >>> m2.visitdir(b'd/ef')
1113 >>> m2.visitdir(b'd/ef')
1089 False
1114 False
1090 """
1115 """
1091
1116
1092 def __init__(self, path, matcher, badfn=None):
1117 def __init__(self, path, matcher, badfn=None):
1093 super(prefixdirmatcher, self).__init__(badfn)
1118 super(prefixdirmatcher, self).__init__(badfn)
1094 if not path:
1119 if not path:
1095 raise error.ProgrammingError(b'prefix path must not be empty')
1120 raise error.ProgrammingError(b'prefix path must not be empty')
1096 self._path = path
1121 self._path = path
1097 self._pathprefix = path + b'/'
1122 self._pathprefix = path + b'/'
1098 self._matcher = matcher
1123 self._matcher = matcher
1099
1124
1100 @propertycache
1125 @propertycache
1101 def _files(self):
1126 def _files(self):
1102 return [self._pathprefix + f for f in self._matcher._files]
1127 return [self._pathprefix + f for f in self._matcher._files]
1103
1128
1104 def matchfn(self, f):
1129 def matchfn(self, f):
1105 if not f.startswith(self._pathprefix):
1130 if not f.startswith(self._pathprefix):
1106 return False
1131 return False
1107 return self._matcher.matchfn(f[len(self._pathprefix) :])
1132 return self._matcher.matchfn(f[len(self._pathprefix) :])
1108
1133
1109 @propertycache
1134 @propertycache
1110 def _pathdirs(self):
1135 def _pathdirs(self):
1111 return set(pathutil.finddirs(self._path))
1136 return set(pathutil.finddirs(self._path))
1112
1137
1113 def visitdir(self, dir):
1138 def visitdir(self, dir):
1114 if dir == self._path:
1139 if dir == self._path:
1115 return self._matcher.visitdir(b'')
1140 return self._matcher.visitdir(b'')
1116 if dir.startswith(self._pathprefix):
1141 if dir.startswith(self._pathprefix):
1117 return self._matcher.visitdir(dir[len(self._pathprefix) :])
1142 return self._matcher.visitdir(dir[len(self._pathprefix) :])
1118 return dir in self._pathdirs
1143 return dir in self._pathdirs
1119
1144
1120 def visitchildrenset(self, dir):
1145 def visitchildrenset(self, dir):
1121 if dir == self._path:
1146 if dir == self._path:
1122 return self._matcher.visitchildrenset(b'')
1147 return self._matcher.visitchildrenset(b'')
1123 if dir.startswith(self._pathprefix):
1148 if dir.startswith(self._pathprefix):
1124 return self._matcher.visitchildrenset(dir[len(self._pathprefix) :])
1149 return self._matcher.visitchildrenset(dir[len(self._pathprefix) :])
1125 if dir in self._pathdirs:
1150 if dir in self._pathdirs:
1126 return b'this'
1151 return b'this'
1127 return set()
1152 return set()
1128
1153
1129 def isexact(self):
1154 def isexact(self):
1130 return self._matcher.isexact()
1155 return self._matcher.isexact()
1131
1156
1132 def prefix(self):
1157 def prefix(self):
1133 return self._matcher.prefix()
1158 return self._matcher.prefix()
1134
1159
1135 @encoding.strmethod
1160 @encoding.strmethod
1136 def __repr__(self):
1161 def __repr__(self):
1137 return b'<prefixdirmatcher path=%r, matcher=%r>' % (
1162 return b'<prefixdirmatcher path=%r, matcher=%r>' % (
1138 pycompat.bytestr(self._path),
1163 pycompat.bytestr(self._path),
1139 self._matcher,
1164 self._matcher,
1140 )
1165 )
1141
1166
1142
1167
1143 class unionmatcher(basematcher):
1168 class unionmatcher(basematcher):
1144 """A matcher that is the union of several matchers.
1169 """A matcher that is the union of several matchers.
1145
1170
1146 The non-matching-attributes (bad, traversedir) are taken from the first
1171 The non-matching-attributes (bad, traversedir) are taken from the first
1147 matcher.
1172 matcher.
1148 """
1173 """
1149
1174
1150 def __init__(self, matchers):
1175 def __init__(self, matchers):
1151 m1 = matchers[0]
1176 m1 = matchers[0]
1152 super(unionmatcher, self).__init__()
1177 super(unionmatcher, self).__init__()
1153 self.traversedir = m1.traversedir
1178 self.traversedir = m1.traversedir
1154 self._matchers = matchers
1179 self._matchers = matchers
1155
1180
1156 def matchfn(self, f):
1181 def matchfn(self, f):
1157 for match in self._matchers:
1182 for match in self._matchers:
1158 if match(f):
1183 if match(f):
1159 return True
1184 return True
1160 return False
1185 return False
1161
1186
1162 def visitdir(self, dir):
1187 def visitdir(self, dir):
1163 r = False
1188 r = False
1164 for m in self._matchers:
1189 for m in self._matchers:
1165 v = m.visitdir(dir)
1190 v = m.visitdir(dir)
1166 if v == b'all':
1191 if v == b'all':
1167 return v
1192 return v
1168 r |= v
1193 r |= v
1169 return r
1194 return r
1170
1195
1171 def visitchildrenset(self, dir):
1196 def visitchildrenset(self, dir):
1172 r = set()
1197 r = set()
1173 this = False
1198 this = False
1174 for m in self._matchers:
1199 for m in self._matchers:
1175 v = m.visitchildrenset(dir)
1200 v = m.visitchildrenset(dir)
1176 if not v:
1201 if not v:
1177 continue
1202 continue
1178 if v == b'all':
1203 if v == b'all':
1179 return v
1204 return v
1180 if this or v == b'this':
1205 if this or v == b'this':
1181 this = True
1206 this = True
1182 # don't break, we might have an 'all' in here.
1207 # don't break, we might have an 'all' in here.
1183 continue
1208 continue
1184 assert isinstance(v, set)
1209 assert isinstance(v, set)
1185 r = r.union(v)
1210 r = r.union(v)
1186 if this:
1211 if this:
1187 return b'this'
1212 return b'this'
1188 return r
1213 return r
1189
1214
1190 @encoding.strmethod
1215 @encoding.strmethod
1191 def __repr__(self):
1216 def __repr__(self):
1192 return b'<unionmatcher matchers=%r>' % self._matchers
1217 return b'<unionmatcher matchers=%r>' % self._matchers
1193
1218
1194
1219
1195 def patkind(pattern, default=None):
1220 def patkind(pattern, default=None):
1196 r"""If pattern is 'kind:pat' with a known kind, return kind.
1221 r"""If pattern is 'kind:pat' with a known kind, return kind.
1197
1222
1198 >>> patkind(br're:.*\.c$')
1223 >>> patkind(br're:.*\.c$')
1199 're'
1224 're'
1200 >>> patkind(b'glob:*.c')
1225 >>> patkind(b'glob:*.c')
1201 'glob'
1226 'glob'
1202 >>> patkind(b'relpath:test.py')
1227 >>> patkind(b'relpath:test.py')
1203 'relpath'
1228 'relpath'
1204 >>> patkind(b'main.py')
1229 >>> patkind(b'main.py')
1205 >>> patkind(b'main.py', default=b're')
1230 >>> patkind(b'main.py', default=b're')
1206 're'
1231 're'
1207 """
1232 """
1208 return _patsplit(pattern, default)[0]
1233 return _patsplit(pattern, default)[0]
1209
1234
1210
1235
1211 def _patsplit(pattern, default):
1236 def _patsplit(pattern, default):
1212 """Split a string into the optional pattern kind prefix and the actual
1237 """Split a string into the optional pattern kind prefix and the actual
1213 pattern."""
1238 pattern."""
1214 if b':' in pattern:
1239 if b':' in pattern:
1215 kind, pat = pattern.split(b':', 1)
1240 kind, pat = pattern.split(b':', 1)
1216 if kind in allpatternkinds:
1241 if kind in allpatternkinds:
1217 return kind, pat
1242 return kind, pat
1218 return default, pattern
1243 return default, pattern
1219
1244
1220
1245
1221 def _globre(pat):
1246 def _globre(pat):
1222 r"""Convert an extended glob string to a regexp string.
1247 r"""Convert an extended glob string to a regexp string.
1223
1248
1224 >>> from . import pycompat
1249 >>> from . import pycompat
1225 >>> def bprint(s):
1250 >>> def bprint(s):
1226 ... print(pycompat.sysstr(s))
1251 ... print(pycompat.sysstr(s))
1227 >>> bprint(_globre(br'?'))
1252 >>> bprint(_globre(br'?'))
1228 .
1253 .
1229 >>> bprint(_globre(br'*'))
1254 >>> bprint(_globre(br'*'))
1230 [^/]*
1255 [^/]*
1231 >>> bprint(_globre(br'**'))
1256 >>> bprint(_globre(br'**'))
1232 .*
1257 .*
1233 >>> bprint(_globre(br'**/a'))
1258 >>> bprint(_globre(br'**/a'))
1234 (?:.*/)?a
1259 (?:.*/)?a
1235 >>> bprint(_globre(br'a/**/b'))
1260 >>> bprint(_globre(br'a/**/b'))
1236 a/(?:.*/)?b
1261 a/(?:.*/)?b
1237 >>> bprint(_globre(br'[a*?!^][^b][!c]'))
1262 >>> bprint(_globre(br'[a*?!^][^b][!c]'))
1238 [a*?!^][\^b][^c]
1263 [a*?!^][\^b][^c]
1239 >>> bprint(_globre(br'{a,b}'))
1264 >>> bprint(_globre(br'{a,b}'))
1240 (?:a|b)
1265 (?:a|b)
1241 >>> bprint(_globre(br'.\*\?'))
1266 >>> bprint(_globre(br'.\*\?'))
1242 \.\*\?
1267 \.\*\?
1243 """
1268 """
1244 i, n = 0, len(pat)
1269 i, n = 0, len(pat)
1245 res = b''
1270 res = b''
1246 group = 0
1271 group = 0
1247 escape = util.stringutil.regexbytesescapemap.get
1272 escape = util.stringutil.regexbytesescapemap.get
1248
1273
1249 def peek():
1274 def peek():
1250 return i < n and pat[i : i + 1]
1275 return i < n and pat[i : i + 1]
1251
1276
1252 while i < n:
1277 while i < n:
1253 c = pat[i : i + 1]
1278 c = pat[i : i + 1]
1254 i += 1
1279 i += 1
1255 if c not in b'*?[{},\\':
1280 if c not in b'*?[{},\\':
1256 res += escape(c, c)
1281 res += escape(c, c)
1257 elif c == b'*':
1282 elif c == b'*':
1258 if peek() == b'*':
1283 if peek() == b'*':
1259 i += 1
1284 i += 1
1260 if peek() == b'/':
1285 if peek() == b'/':
1261 i += 1
1286 i += 1
1262 res += b'(?:.*/)?'
1287 res += b'(?:.*/)?'
1263 else:
1288 else:
1264 res += b'.*'
1289 res += b'.*'
1265 else:
1290 else:
1266 res += b'[^/]*'
1291 res += b'[^/]*'
1267 elif c == b'?':
1292 elif c == b'?':
1268 res += b'.'
1293 res += b'.'
1269 elif c == b'[':
1294 elif c == b'[':
1270 j = i
1295 j = i
1271 if j < n and pat[j : j + 1] in b'!]':
1296 if j < n and pat[j : j + 1] in b'!]':
1272 j += 1
1297 j += 1
1273 while j < n and pat[j : j + 1] != b']':
1298 while j < n and pat[j : j + 1] != b']':
1274 j += 1
1299 j += 1
1275 if j >= n:
1300 if j >= n:
1276 res += b'\\['
1301 res += b'\\['
1277 else:
1302 else:
1278 stuff = pat[i:j].replace(b'\\', b'\\\\')
1303 stuff = pat[i:j].replace(b'\\', b'\\\\')
1279 i = j + 1
1304 i = j + 1
1280 if stuff[0:1] == b'!':
1305 if stuff[0:1] == b'!':
1281 stuff = b'^' + stuff[1:]
1306 stuff = b'^' + stuff[1:]
1282 elif stuff[0:1] == b'^':
1307 elif stuff[0:1] == b'^':
1283 stuff = b'\\' + stuff
1308 stuff = b'\\' + stuff
1284 res = b'%s[%s]' % (res, stuff)
1309 res = b'%s[%s]' % (res, stuff)
1285 elif c == b'{':
1310 elif c == b'{':
1286 group += 1
1311 group += 1
1287 res += b'(?:'
1312 res += b'(?:'
1288 elif c == b'}' and group:
1313 elif c == b'}' and group:
1289 res += b')'
1314 res += b')'
1290 group -= 1
1315 group -= 1
1291 elif c == b',' and group:
1316 elif c == b',' and group:
1292 res += b'|'
1317 res += b'|'
1293 elif c == b'\\':
1318 elif c == b'\\':
1294 p = peek()
1319 p = peek()
1295 if p:
1320 if p:
1296 i += 1
1321 i += 1
1297 res += escape(p, p)
1322 res += escape(p, p)
1298 else:
1323 else:
1299 res += escape(c, c)
1324 res += escape(c, c)
1300 else:
1325 else:
1301 res += escape(c, c)
1326 res += escape(c, c)
1302 return res
1327 return res
1303
1328
1304
1329
1305 def _regex(kind, pat, globsuffix):
1330 def _regex(kind, pat, globsuffix):
1306 """Convert a (normalized) pattern of any kind into a
1331 """Convert a (normalized) pattern of any kind into a
1307 regular expression.
1332 regular expression.
1308 globsuffix is appended to the regexp of globs."""
1333 globsuffix is appended to the regexp of globs."""
1309 if not pat and kind in (b'glob', b'relpath'):
1334 if not pat and kind in (b'glob', b'relpath'):
1310 return b''
1335 return b''
1311 if kind == b're':
1336 if kind == b're':
1312 return pat
1337 return pat
1313 if kind in (b'path', b'relpath'):
1338 if kind in (b'path', b'relpath'):
1314 if pat == b'.':
1339 if pat == b'.':
1315 return b''
1340 return b''
1316 return util.stringutil.reescape(pat) + b'(?:/|$)'
1341 return util.stringutil.reescape(pat) + b'(?:/|$)'
1317 if kind == b'rootfilesin':
1342 if kind == b'rootfilesin':
1318 if pat == b'.':
1343 if pat == b'.':
1319 escaped = b''
1344 escaped = b''
1320 else:
1345 else:
1321 # Pattern is a directory name.
1346 # Pattern is a directory name.
1322 escaped = util.stringutil.reescape(pat) + b'/'
1347 escaped = util.stringutil.reescape(pat) + b'/'
1323 # Anything after the pattern must be a non-directory.
1348 # Anything after the pattern must be a non-directory.
1324 return escaped + b'[^/]+$'
1349 return escaped + b'[^/]+$'
1325 if kind == b'relglob':
1350 if kind == b'relglob':
1326 globre = _globre(pat)
1351 globre = _globre(pat)
1327 if globre.startswith(b'[^/]*'):
1352 if globre.startswith(b'[^/]*'):
1328 # When pat has the form *XYZ (common), make the returned regex more
1353 # When pat has the form *XYZ (common), make the returned regex more
1329 # legible by returning the regex for **XYZ instead of **/*XYZ.
1354 # legible by returning the regex for **XYZ instead of **/*XYZ.
1330 return b'.*' + globre[len(b'[^/]*') :] + globsuffix
1355 return b'.*' + globre[len(b'[^/]*') :] + globsuffix
1331 return b'(?:|.*/)' + globre + globsuffix
1356 return b'(?:|.*/)' + globre + globsuffix
1332 if kind == b'relre':
1357 if kind == b'relre':
1333 if pat.startswith(b'^'):
1358 if pat.startswith(b'^'):
1334 return pat
1359 return pat
1335 return b'.*' + pat
1360 return b'.*' + pat
1336 if kind in (b'glob', b'rootglob'):
1361 if kind in (b'glob', b'rootglob'):
1337 return _globre(pat) + globsuffix
1362 return _globre(pat) + globsuffix
1338 raise error.ProgrammingError(b'not a regex pattern: %s:%s' % (kind, pat))
1363 raise error.ProgrammingError(b'not a regex pattern: %s:%s' % (kind, pat))
1339
1364
1340
1365
1341 def _buildmatch(kindpats, globsuffix, root):
1366 def _buildmatch(kindpats, globsuffix, root):
1342 """Return regexp string and a matcher function for kindpats.
1367 """Return regexp string and a matcher function for kindpats.
1343 globsuffix is appended to the regexp of globs."""
1368 globsuffix is appended to the regexp of globs."""
1344 matchfuncs = []
1369 matchfuncs = []
1345
1370
1346 subincludes, kindpats = _expandsubinclude(kindpats, root)
1371 subincludes, kindpats = _expandsubinclude(kindpats, root)
1347 if subincludes:
1372 if subincludes:
1348 submatchers = {}
1373 submatchers = {}
1349
1374
1350 def matchsubinclude(f):
1375 def matchsubinclude(f):
1351 for prefix, matcherargs in subincludes:
1376 for prefix, matcherargs in subincludes:
1352 if f.startswith(prefix):
1377 if f.startswith(prefix):
1353 mf = submatchers.get(prefix)
1378 mf = submatchers.get(prefix)
1354 if mf is None:
1379 if mf is None:
1355 mf = match(*matcherargs)
1380 mf = match(*matcherargs)
1356 submatchers[prefix] = mf
1381 submatchers[prefix] = mf
1357
1382
1358 if mf(f[len(prefix) :]):
1383 if mf(f[len(prefix) :]):
1359 return True
1384 return True
1360 return False
1385 return False
1361
1386
1362 matchfuncs.append(matchsubinclude)
1387 matchfuncs.append(matchsubinclude)
1363
1388
1364 regex = b''
1389 regex = b''
1365 if kindpats:
1390 if kindpats:
1366 if all(k == b'rootfilesin' for k, p, s in kindpats):
1391 if all(k == b'rootfilesin' for k, p, s in kindpats):
1367 dirs = {p for k, p, s in kindpats}
1392 dirs = {p for k, p, s in kindpats}
1368
1393
1369 def mf(f):
1394 def mf(f):
1370 i = f.rfind(b'/')
1395 i = f.rfind(b'/')
1371 if i >= 0:
1396 if i >= 0:
1372 dir = f[:i]
1397 dir = f[:i]
1373 else:
1398 else:
1374 dir = b'.'
1399 dir = b'.'
1375 return dir in dirs
1400 return dir in dirs
1376
1401
1377 regex = b'rootfilesin: %s' % stringutil.pprint(list(sorted(dirs)))
1402 regex = b'rootfilesin: %s' % stringutil.pprint(list(sorted(dirs)))
1378 matchfuncs.append(mf)
1403 matchfuncs.append(mf)
1379 else:
1404 else:
1380 regex, mf = _buildregexmatch(kindpats, globsuffix)
1405 regex, mf = _buildregexmatch(kindpats, globsuffix)
1381 matchfuncs.append(mf)
1406 matchfuncs.append(mf)
1382
1407
1383 if len(matchfuncs) == 1:
1408 if len(matchfuncs) == 1:
1384 return regex, matchfuncs[0]
1409 return regex, matchfuncs[0]
1385 else:
1410 else:
1386 return regex, lambda f: any(mf(f) for mf in matchfuncs)
1411 return regex, lambda f: any(mf(f) for mf in matchfuncs)
1387
1412
1388
1413
1389 MAX_RE_SIZE = 20000
1414 MAX_RE_SIZE = 20000
1390
1415
1391
1416
1392 def _joinregexes(regexps):
1417 def _joinregexes(regexps):
1393 """gather multiple regular expressions into a single one"""
1418 """gather multiple regular expressions into a single one"""
1394 return b'|'.join(regexps)
1419 return b'|'.join(regexps)
1395
1420
1396
1421
1397 def _buildregexmatch(kindpats, globsuffix):
1422 def _buildregexmatch(kindpats, globsuffix):
1398 """Build a match function from a list of kinds and kindpats,
1423 """Build a match function from a list of kinds and kindpats,
1399 return regexp string and a matcher function.
1424 return regexp string and a matcher function.
1400
1425
1401 Test too large input
1426 Test too large input
1402 >>> _buildregexmatch([
1427 >>> _buildregexmatch([
1403 ... (b'relglob', b'?' * MAX_RE_SIZE, b'')
1428 ... (b'relglob', b'?' * MAX_RE_SIZE, b'')
1404 ... ], b'$')
1429 ... ], b'$')
1405 Traceback (most recent call last):
1430 Traceback (most recent call last):
1406 ...
1431 ...
1407 Abort: matcher pattern is too long (20009 bytes)
1432 Abort: matcher pattern is too long (20009 bytes)
1408 """
1433 """
1409 try:
1434 try:
1410 allgroups = []
1435 allgroups = []
1411 regexps = [_regex(k, p, globsuffix) for (k, p, s) in kindpats]
1436 regexps = [_regex(k, p, globsuffix) for (k, p, s) in kindpats]
1412 fullregexp = _joinregexes(regexps)
1437 fullregexp = _joinregexes(regexps)
1413
1438
1414 startidx = 0
1439 startidx = 0
1415 groupsize = 0
1440 groupsize = 0
1416 for idx, r in enumerate(regexps):
1441 for idx, r in enumerate(regexps):
1417 piecesize = len(r)
1442 piecesize = len(r)
1418 if piecesize > MAX_RE_SIZE:
1443 if piecesize > MAX_RE_SIZE:
1419 msg = _(b"matcher pattern is too long (%d bytes)") % piecesize
1444 msg = _(b"matcher pattern is too long (%d bytes)") % piecesize
1420 raise error.Abort(msg)
1445 raise error.Abort(msg)
1421 elif (groupsize + piecesize) > MAX_RE_SIZE:
1446 elif (groupsize + piecesize) > MAX_RE_SIZE:
1422 group = regexps[startidx:idx]
1447 group = regexps[startidx:idx]
1423 allgroups.append(_joinregexes(group))
1448 allgroups.append(_joinregexes(group))
1424 startidx = idx
1449 startidx = idx
1425 groupsize = 0
1450 groupsize = 0
1426 groupsize += piecesize + 1
1451 groupsize += piecesize + 1
1427
1452
1428 if startidx == 0:
1453 if startidx == 0:
1429 matcher = _rematcher(fullregexp)
1454 matcher = _rematcher(fullregexp)
1430 func = lambda s: bool(matcher(s))
1455 func = lambda s: bool(matcher(s))
1431 else:
1456 else:
1432 group = regexps[startidx:]
1457 group = regexps[startidx:]
1433 allgroups.append(_joinregexes(group))
1458 allgroups.append(_joinregexes(group))
1434 allmatchers = [_rematcher(g) for g in allgroups]
1459 allmatchers = [_rematcher(g) for g in allgroups]
1435 func = lambda s: any(m(s) for m in allmatchers)
1460 func = lambda s: any(m(s) for m in allmatchers)
1436 return fullregexp, func
1461 return fullregexp, func
1437 except re.error:
1462 except re.error:
1438 for k, p, s in kindpats:
1463 for k, p, s in kindpats:
1439 try:
1464 try:
1440 _rematcher(_regex(k, p, globsuffix))
1465 _rematcher(_regex(k, p, globsuffix))
1441 except re.error:
1466 except re.error:
1442 if s:
1467 if s:
1443 raise error.Abort(
1468 raise error.Abort(
1444 _(b"%s: invalid pattern (%s): %s") % (s, k, p)
1469 _(b"%s: invalid pattern (%s): %s") % (s, k, p)
1445 )
1470 )
1446 else:
1471 else:
1447 raise error.Abort(_(b"invalid pattern (%s): %s") % (k, p))
1472 raise error.Abort(_(b"invalid pattern (%s): %s") % (k, p))
1448 raise error.Abort(_(b"invalid pattern"))
1473 raise error.Abort(_(b"invalid pattern"))
1449
1474
1450
1475
1451 def _patternrootsanddirs(kindpats):
1476 def _patternrootsanddirs(kindpats):
1452 """Returns roots and directories corresponding to each pattern.
1477 """Returns roots and directories corresponding to each pattern.
1453
1478
1454 This calculates the roots and directories exactly matching the patterns and
1479 This calculates the roots and directories exactly matching the patterns and
1455 returns a tuple of (roots, dirs) for each. It does not return other
1480 returns a tuple of (roots, dirs) for each. It does not return other
1456 directories which may also need to be considered, like the parent
1481 directories which may also need to be considered, like the parent
1457 directories.
1482 directories.
1458 """
1483 """
1459 r = []
1484 r = []
1460 d = []
1485 d = []
1461 for kind, pat, source in kindpats:
1486 for kind, pat, source in kindpats:
1462 if kind in (b'glob', b'rootglob'): # find the non-glob prefix
1487 if kind in (b'glob', b'rootglob'): # find the non-glob prefix
1463 root = []
1488 root = []
1464 for p in pat.split(b'/'):
1489 for p in pat.split(b'/'):
1465 if b'[' in p or b'{' in p or b'*' in p or b'?' in p:
1490 if b'[' in p or b'{' in p or b'*' in p or b'?' in p:
1466 break
1491 break
1467 root.append(p)
1492 root.append(p)
1468 r.append(b'/'.join(root))
1493 r.append(b'/'.join(root))
1469 elif kind in (b'relpath', b'path'):
1494 elif kind in (b'relpath', b'path'):
1470 if pat == b'.':
1495 if pat == b'.':
1471 pat = b''
1496 pat = b''
1472 r.append(pat)
1497 r.append(pat)
1473 elif kind in (b'rootfilesin',):
1498 elif kind in (b'rootfilesin',):
1474 if pat == b'.':
1499 if pat == b'.':
1475 pat = b''
1500 pat = b''
1476 d.append(pat)
1501 d.append(pat)
1477 else: # relglob, re, relre
1502 else: # relglob, re, relre
1478 r.append(b'')
1503 r.append(b'')
1479 return r, d
1504 return r, d
1480
1505
1481
1506
1482 def _roots(kindpats):
1507 def _roots(kindpats):
1483 '''Returns root directories to match recursively from the given patterns.'''
1508 '''Returns root directories to match recursively from the given patterns.'''
1484 roots, dirs = _patternrootsanddirs(kindpats)
1509 roots, dirs = _patternrootsanddirs(kindpats)
1485 return roots
1510 return roots
1486
1511
1487
1512
1488 def _rootsdirsandparents(kindpats):
1513 def _rootsdirsandparents(kindpats):
1489 """Returns roots and exact directories from patterns.
1514 """Returns roots and exact directories from patterns.
1490
1515
1491 `roots` are directories to match recursively, `dirs` should
1516 `roots` are directories to match recursively, `dirs` should
1492 be matched non-recursively, and `parents` are the implicitly required
1517 be matched non-recursively, and `parents` are the implicitly required
1493 directories to walk to items in either roots or dirs.
1518 directories to walk to items in either roots or dirs.
1494
1519
1495 Returns a tuple of (roots, dirs, parents).
1520 Returns a tuple of (roots, dirs, parents).
1496
1521
1497 >>> r = _rootsdirsandparents(
1522 >>> r = _rootsdirsandparents(
1498 ... [(b'glob', b'g/h/*', b''), (b'glob', b'g/h', b''),
1523 ... [(b'glob', b'g/h/*', b''), (b'glob', b'g/h', b''),
1499 ... (b'glob', b'g*', b'')])
1524 ... (b'glob', b'g*', b'')])
1500 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1525 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1501 (['g/h', 'g/h', ''], []) ['', 'g']
1526 (['g/h', 'g/h', ''], []) ['', 'g']
1502 >>> r = _rootsdirsandparents(
1527 >>> r = _rootsdirsandparents(
1503 ... [(b'rootfilesin', b'g/h', b''), (b'rootfilesin', b'', b'')])
1528 ... [(b'rootfilesin', b'g/h', b''), (b'rootfilesin', b'', b'')])
1504 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1529 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1505 ([], ['g/h', '']) ['', 'g']
1530 ([], ['g/h', '']) ['', 'g']
1506 >>> r = _rootsdirsandparents(
1531 >>> r = _rootsdirsandparents(
1507 ... [(b'relpath', b'r', b''), (b'path', b'p/p', b''),
1532 ... [(b'relpath', b'r', b''), (b'path', b'p/p', b''),
1508 ... (b'path', b'', b'')])
1533 ... (b'path', b'', b'')])
1509 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1534 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1510 (['r', 'p/p', ''], []) ['', 'p']
1535 (['r', 'p/p', ''], []) ['', 'p']
1511 >>> r = _rootsdirsandparents(
1536 >>> r = _rootsdirsandparents(
1512 ... [(b'relglob', b'rg*', b''), (b're', b're/', b''),
1537 ... [(b'relglob', b'rg*', b''), (b're', b're/', b''),
1513 ... (b'relre', b'rr', b'')])
1538 ... (b'relre', b'rr', b'')])
1514 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1539 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1515 (['', '', ''], []) ['']
1540 (['', '', ''], []) ['']
1516 """
1541 """
1517 r, d = _patternrootsanddirs(kindpats)
1542 r, d = _patternrootsanddirs(kindpats)
1518
1543
1519 p = set()
1544 p = set()
1520 # Add the parents as non-recursive/exact directories, since they must be
1545 # Add the parents as non-recursive/exact directories, since they must be
1521 # scanned to get to either the roots or the other exact directories.
1546 # scanned to get to either the roots or the other exact directories.
1522 p.update(pathutil.dirs(d))
1547 p.update(pathutil.dirs(d))
1523 p.update(pathutil.dirs(r))
1548 p.update(pathutil.dirs(r))
1524
1549
1525 # FIXME: all uses of this function convert these to sets, do so before
1550 # FIXME: all uses of this function convert these to sets, do so before
1526 # returning.
1551 # returning.
1527 # FIXME: all uses of this function do not need anything in 'roots' and
1552 # FIXME: all uses of this function do not need anything in 'roots' and
1528 # 'dirs' to also be in 'parents', consider removing them before returning.
1553 # 'dirs' to also be in 'parents', consider removing them before returning.
1529 return r, d, p
1554 return r, d, p
1530
1555
1531
1556
1532 def _explicitfiles(kindpats):
1557 def _explicitfiles(kindpats):
1533 """Returns the potential explicit filenames from the patterns.
1558 """Returns the potential explicit filenames from the patterns.
1534
1559
1535 >>> _explicitfiles([(b'path', b'foo/bar', b'')])
1560 >>> _explicitfiles([(b'path', b'foo/bar', b'')])
1536 ['foo/bar']
1561 ['foo/bar']
1537 >>> _explicitfiles([(b'rootfilesin', b'foo/bar', b'')])
1562 >>> _explicitfiles([(b'rootfilesin', b'foo/bar', b'')])
1538 []
1563 []
1539 """
1564 """
1540 # Keep only the pattern kinds where one can specify filenames (vs only
1565 # Keep only the pattern kinds where one can specify filenames (vs only
1541 # directory names).
1566 # directory names).
1542 filable = [kp for kp in kindpats if kp[0] not in (b'rootfilesin',)]
1567 filable = [kp for kp in kindpats if kp[0] not in (b'rootfilesin',)]
1543 return _roots(filable)
1568 return _roots(filable)
1544
1569
1545
1570
1546 def _prefix(kindpats):
1571 def _prefix(kindpats):
1547 '''Whether all the patterns match a prefix (i.e. recursively)'''
1572 '''Whether all the patterns match a prefix (i.e. recursively)'''
1548 for kind, pat, source in kindpats:
1573 for kind, pat, source in kindpats:
1549 if kind not in (b'path', b'relpath'):
1574 if kind not in (b'path', b'relpath'):
1550 return False
1575 return False
1551 return True
1576 return True
1552
1577
1553
1578
1554 _commentre = None
1579 _commentre = None
1555
1580
1556
1581
1557 def readpatternfile(filepath, warn, sourceinfo=False):
1582 def readpatternfile(filepath, warn, sourceinfo=False):
1558 """parse a pattern file, returning a list of
1583 """parse a pattern file, returning a list of
1559 patterns. These patterns should be given to compile()
1584 patterns. These patterns should be given to compile()
1560 to be validated and converted into a match function.
1585 to be validated and converted into a match function.
1561
1586
1562 trailing white space is dropped.
1587 trailing white space is dropped.
1563 the escape character is backslash.
1588 the escape character is backslash.
1564 comments start with #.
1589 comments start with #.
1565 empty lines are skipped.
1590 empty lines are skipped.
1566
1591
1567 lines can be of the following formats:
1592 lines can be of the following formats:
1568
1593
1569 syntax: regexp # defaults following lines to non-rooted regexps
1594 syntax: regexp # defaults following lines to non-rooted regexps
1570 syntax: glob # defaults following lines to non-rooted globs
1595 syntax: glob # defaults following lines to non-rooted globs
1571 re:pattern # non-rooted regular expression
1596 re:pattern # non-rooted regular expression
1572 glob:pattern # non-rooted glob
1597 glob:pattern # non-rooted glob
1573 rootglob:pat # rooted glob (same root as ^ in regexps)
1598 rootglob:pat # rooted glob (same root as ^ in regexps)
1574 pattern # pattern of the current default type
1599 pattern # pattern of the current default type
1575
1600
1576 if sourceinfo is set, returns a list of tuples:
1601 if sourceinfo is set, returns a list of tuples:
1577 (pattern, lineno, originalline).
1602 (pattern, lineno, originalline).
1578 This is useful to debug ignore patterns.
1603 This is useful to debug ignore patterns.
1579 """
1604 """
1580
1605
1581 syntaxes = {
1606 syntaxes = {
1582 b're': b'relre:',
1607 b're': b'relre:',
1583 b'regexp': b'relre:',
1608 b'regexp': b'relre:',
1584 b'glob': b'relglob:',
1609 b'glob': b'relglob:',
1585 b'rootglob': b'rootglob:',
1610 b'rootglob': b'rootglob:',
1586 b'include': b'include',
1611 b'include': b'include',
1587 b'subinclude': b'subinclude',
1612 b'subinclude': b'subinclude',
1588 }
1613 }
1589 syntax = b'relre:'
1614 syntax = b'relre:'
1590 patterns = []
1615 patterns = []
1591
1616
1592 fp = open(filepath, b'rb')
1617 fp = open(filepath, b'rb')
1593 for lineno, line in enumerate(util.iterfile(fp), start=1):
1618 for lineno, line in enumerate(util.iterfile(fp), start=1):
1594 if b"#" in line:
1619 if b"#" in line:
1595 global _commentre
1620 global _commentre
1596 if not _commentre:
1621 if not _commentre:
1597 _commentre = util.re.compile(br'((?:^|[^\\])(?:\\\\)*)#.*')
1622 _commentre = util.re.compile(br'((?:^|[^\\])(?:\\\\)*)#.*')
1598 # remove comments prefixed by an even number of escapes
1623 # remove comments prefixed by an even number of escapes
1599 m = _commentre.search(line)
1624 m = _commentre.search(line)
1600 if m:
1625 if m:
1601 line = line[: m.end(1)]
1626 line = line[: m.end(1)]
1602 # fixup properly escaped comments that survived the above
1627 # fixup properly escaped comments that survived the above
1603 line = line.replace(b"\\#", b"#")
1628 line = line.replace(b"\\#", b"#")
1604 line = line.rstrip()
1629 line = line.rstrip()
1605 if not line:
1630 if not line:
1606 continue
1631 continue
1607
1632
1608 if line.startswith(b'syntax:'):
1633 if line.startswith(b'syntax:'):
1609 s = line[7:].strip()
1634 s = line[7:].strip()
1610 try:
1635 try:
1611 syntax = syntaxes[s]
1636 syntax = syntaxes[s]
1612 except KeyError:
1637 except KeyError:
1613 if warn:
1638 if warn:
1614 warn(
1639 warn(
1615 _(b"%s: ignoring invalid syntax '%s'\n") % (filepath, s)
1640 _(b"%s: ignoring invalid syntax '%s'\n") % (filepath, s)
1616 )
1641 )
1617 continue
1642 continue
1618
1643
1619 linesyntax = syntax
1644 linesyntax = syntax
1620 for s, rels in pycompat.iteritems(syntaxes):
1645 for s, rels in pycompat.iteritems(syntaxes):
1621 if line.startswith(rels):
1646 if line.startswith(rels):
1622 linesyntax = rels
1647 linesyntax = rels
1623 line = line[len(rels) :]
1648 line = line[len(rels) :]
1624 break
1649 break
1625 elif line.startswith(s + b':'):
1650 elif line.startswith(s + b':'):
1626 linesyntax = rels
1651 linesyntax = rels
1627 line = line[len(s) + 1 :]
1652 line = line[len(s) + 1 :]
1628 break
1653 break
1629 if sourceinfo:
1654 if sourceinfo:
1630 patterns.append((linesyntax + line, lineno, line))
1655 patterns.append((linesyntax + line, lineno, line))
1631 else:
1656 else:
1632 patterns.append(linesyntax + line)
1657 patterns.append(linesyntax + line)
1633 fp.close()
1658 fp.close()
1634 return patterns
1659 return patterns
General Comments 0
You need to be logged in to leave comments. Login now