##// END OF EJS Templates
rust-filepatterns: remove bridge code for filepatterns-related functions...
Raphaël Gomès -
r44589:3bd77c64 default
parent child Browse files
Show More
@@ -1,1622 +1,1601 b''
1 # match.py - filename matching
1 # match.py - filename matching
2 #
2 #
3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import, print_function
8 from __future__ import absolute_import, print_function
9
9
10 import copy
10 import copy
11 import itertools
11 import itertools
12 import os
12 import os
13 import re
13 import re
14
14
15 from .i18n import _
15 from .i18n import _
16 from .pycompat import open
16 from .pycompat import open
17 from . import (
17 from . import (
18 encoding,
18 encoding,
19 error,
19 error,
20 pathutil,
20 pathutil,
21 policy,
21 policy,
22 pycompat,
22 pycompat,
23 util,
23 util,
24 )
24 )
25 from .utils import stringutil
25 from .utils import stringutil
26
26
27 rustmod = policy.importrust('filepatterns')
27 rustmod = policy.importrust('dirstate')
28
28
29 allpatternkinds = (
29 allpatternkinds = (
30 b're',
30 b're',
31 b'glob',
31 b'glob',
32 b'path',
32 b'path',
33 b'relglob',
33 b'relglob',
34 b'relpath',
34 b'relpath',
35 b'relre',
35 b'relre',
36 b'rootglob',
36 b'rootglob',
37 b'listfile',
37 b'listfile',
38 b'listfile0',
38 b'listfile0',
39 b'set',
39 b'set',
40 b'include',
40 b'include',
41 b'subinclude',
41 b'subinclude',
42 b'rootfilesin',
42 b'rootfilesin',
43 )
43 )
44 cwdrelativepatternkinds = (b'relpath', b'glob')
44 cwdrelativepatternkinds = (b'relpath', b'glob')
45
45
46 propertycache = util.propertycache
46 propertycache = util.propertycache
47
47
48
48
49 def _rematcher(regex):
49 def _rematcher(regex):
50 '''compile the regexp with the best available regexp engine and return a
50 '''compile the regexp with the best available regexp engine and return a
51 matcher function'''
51 matcher function'''
52 m = util.re.compile(regex)
52 m = util.re.compile(regex)
53 try:
53 try:
54 # slightly faster, provided by facebook's re2 bindings
54 # slightly faster, provided by facebook's re2 bindings
55 return m.test_match
55 return m.test_match
56 except AttributeError:
56 except AttributeError:
57 return m.match
57 return m.match
58
58
59
59
60 def _expandsets(cwd, kindpats, ctx=None, listsubrepos=False, badfn=None):
60 def _expandsets(cwd, kindpats, ctx=None, listsubrepos=False, badfn=None):
61 '''Returns the kindpats list with the 'set' patterns expanded to matchers'''
61 '''Returns the kindpats list with the 'set' patterns expanded to matchers'''
62 matchers = []
62 matchers = []
63 other = []
63 other = []
64
64
65 for kind, pat, source in kindpats:
65 for kind, pat, source in kindpats:
66 if kind == b'set':
66 if kind == b'set':
67 if ctx is None:
67 if ctx is None:
68 raise error.ProgrammingError(
68 raise error.ProgrammingError(
69 b"fileset expression with no context"
69 b"fileset expression with no context"
70 )
70 )
71 matchers.append(ctx.matchfileset(cwd, pat, badfn=badfn))
71 matchers.append(ctx.matchfileset(cwd, pat, badfn=badfn))
72
72
73 if listsubrepos:
73 if listsubrepos:
74 for subpath in ctx.substate:
74 for subpath in ctx.substate:
75 sm = ctx.sub(subpath).matchfileset(cwd, pat, badfn=badfn)
75 sm = ctx.sub(subpath).matchfileset(cwd, pat, badfn=badfn)
76 pm = prefixdirmatcher(subpath, sm, badfn=badfn)
76 pm = prefixdirmatcher(subpath, sm, badfn=badfn)
77 matchers.append(pm)
77 matchers.append(pm)
78
78
79 continue
79 continue
80 other.append((kind, pat, source))
80 other.append((kind, pat, source))
81 return matchers, other
81 return matchers, other
82
82
83
83
84 def _expandsubinclude(kindpats, root):
84 def _expandsubinclude(kindpats, root):
85 '''Returns the list of subinclude matcher args and the kindpats without the
85 '''Returns the list of subinclude matcher args and the kindpats without the
86 subincludes in it.'''
86 subincludes in it.'''
87 relmatchers = []
87 relmatchers = []
88 other = []
88 other = []
89
89
90 for kind, pat, source in kindpats:
90 for kind, pat, source in kindpats:
91 if kind == b'subinclude':
91 if kind == b'subinclude':
92 sourceroot = pathutil.dirname(util.normpath(source))
92 sourceroot = pathutil.dirname(util.normpath(source))
93 pat = util.pconvert(pat)
93 pat = util.pconvert(pat)
94 path = pathutil.join(sourceroot, pat)
94 path = pathutil.join(sourceroot, pat)
95
95
96 newroot = pathutil.dirname(path)
96 newroot = pathutil.dirname(path)
97 matcherargs = (newroot, b'', [], [b'include:%s' % path])
97 matcherargs = (newroot, b'', [], [b'include:%s' % path])
98
98
99 prefix = pathutil.canonpath(root, root, newroot)
99 prefix = pathutil.canonpath(root, root, newroot)
100 if prefix:
100 if prefix:
101 prefix += b'/'
101 prefix += b'/'
102 relmatchers.append((prefix, matcherargs))
102 relmatchers.append((prefix, matcherargs))
103 else:
103 else:
104 other.append((kind, pat, source))
104 other.append((kind, pat, source))
105
105
106 return relmatchers, other
106 return relmatchers, other
107
107
108
108
109 def _kindpatsalwaysmatch(kindpats):
109 def _kindpatsalwaysmatch(kindpats):
110 """"Checks whether the kindspats match everything, as e.g.
110 """"Checks whether the kindspats match everything, as e.g.
111 'relpath:.' does.
111 'relpath:.' does.
112 """
112 """
113 for kind, pat, source in kindpats:
113 for kind, pat, source in kindpats:
114 if pat != b'' or kind not in [b'relpath', b'glob']:
114 if pat != b'' or kind not in [b'relpath', b'glob']:
115 return False
115 return False
116 return True
116 return True
117
117
118
118
119 def _buildkindpatsmatcher(
119 def _buildkindpatsmatcher(
120 matchercls, root, cwd, kindpats, ctx=None, listsubrepos=False, badfn=None,
120 matchercls, root, cwd, kindpats, ctx=None, listsubrepos=False, badfn=None,
121 ):
121 ):
122 matchers = []
122 matchers = []
123 fms, kindpats = _expandsets(
123 fms, kindpats = _expandsets(
124 cwd, kindpats, ctx=ctx, listsubrepos=listsubrepos, badfn=badfn,
124 cwd, kindpats, ctx=ctx, listsubrepos=listsubrepos, badfn=badfn,
125 )
125 )
126 if kindpats:
126 if kindpats:
127 m = matchercls(root, kindpats, badfn=badfn)
127 m = matchercls(root, kindpats, badfn=badfn)
128 matchers.append(m)
128 matchers.append(m)
129 if fms:
129 if fms:
130 matchers.extend(fms)
130 matchers.extend(fms)
131 if not matchers:
131 if not matchers:
132 return nevermatcher(badfn=badfn)
132 return nevermatcher(badfn=badfn)
133 if len(matchers) == 1:
133 if len(matchers) == 1:
134 return matchers[0]
134 return matchers[0]
135 return unionmatcher(matchers)
135 return unionmatcher(matchers)
136
136
137
137
138 def match(
138 def match(
139 root,
139 root,
140 cwd,
140 cwd,
141 patterns=None,
141 patterns=None,
142 include=None,
142 include=None,
143 exclude=None,
143 exclude=None,
144 default=b'glob',
144 default=b'glob',
145 auditor=None,
145 auditor=None,
146 ctx=None,
146 ctx=None,
147 listsubrepos=False,
147 listsubrepos=False,
148 warn=None,
148 warn=None,
149 badfn=None,
149 badfn=None,
150 icasefs=False,
150 icasefs=False,
151 ):
151 ):
152 r"""build an object to match a set of file patterns
152 r"""build an object to match a set of file patterns
153
153
154 arguments:
154 arguments:
155 root - the canonical root of the tree you're matching against
155 root - the canonical root of the tree you're matching against
156 cwd - the current working directory, if relevant
156 cwd - the current working directory, if relevant
157 patterns - patterns to find
157 patterns - patterns to find
158 include - patterns to include (unless they are excluded)
158 include - patterns to include (unless they are excluded)
159 exclude - patterns to exclude (even if they are included)
159 exclude - patterns to exclude (even if they are included)
160 default - if a pattern in patterns has no explicit type, assume this one
160 default - if a pattern in patterns has no explicit type, assume this one
161 auditor - optional path auditor
161 auditor - optional path auditor
162 ctx - optional changecontext
162 ctx - optional changecontext
163 listsubrepos - if True, recurse into subrepositories
163 listsubrepos - if True, recurse into subrepositories
164 warn - optional function used for printing warnings
164 warn - optional function used for printing warnings
165 badfn - optional bad() callback for this matcher instead of the default
165 badfn - optional bad() callback for this matcher instead of the default
166 icasefs - make a matcher for wdir on case insensitive filesystems, which
166 icasefs - make a matcher for wdir on case insensitive filesystems, which
167 normalizes the given patterns to the case in the filesystem
167 normalizes the given patterns to the case in the filesystem
168
168
169 a pattern is one of:
169 a pattern is one of:
170 'glob:<glob>' - a glob relative to cwd
170 'glob:<glob>' - a glob relative to cwd
171 're:<regexp>' - a regular expression
171 're:<regexp>' - a regular expression
172 'path:<path>' - a path relative to repository root, which is matched
172 'path:<path>' - a path relative to repository root, which is matched
173 recursively
173 recursively
174 'rootfilesin:<path>' - a path relative to repository root, which is
174 'rootfilesin:<path>' - a path relative to repository root, which is
175 matched non-recursively (will not match subdirectories)
175 matched non-recursively (will not match subdirectories)
176 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
176 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
177 'relpath:<path>' - a path relative to cwd
177 'relpath:<path>' - a path relative to cwd
178 'relre:<regexp>' - a regexp that needn't match the start of a name
178 'relre:<regexp>' - a regexp that needn't match the start of a name
179 'set:<fileset>' - a fileset expression
179 'set:<fileset>' - a fileset expression
180 'include:<path>' - a file of patterns to read and include
180 'include:<path>' - a file of patterns to read and include
181 'subinclude:<path>' - a file of patterns to match against files under
181 'subinclude:<path>' - a file of patterns to match against files under
182 the same directory
182 the same directory
183 '<something>' - a pattern of the specified default type
183 '<something>' - a pattern of the specified default type
184
184
185 >>> def _match(root, *args, **kwargs):
185 >>> def _match(root, *args, **kwargs):
186 ... return match(util.localpath(root), *args, **kwargs)
186 ... return match(util.localpath(root), *args, **kwargs)
187
187
188 Usually a patternmatcher is returned:
188 Usually a patternmatcher is returned:
189 >>> _match(b'/foo', b'.', [b're:.*\.c$', b'path:foo/a', b'*.py'])
189 >>> _match(b'/foo', b'.', [b're:.*\.c$', b'path:foo/a', b'*.py'])
190 <patternmatcher patterns='.*\\.c$|foo/a(?:/|$)|[^/]*\\.py$'>
190 <patternmatcher patterns='.*\\.c$|foo/a(?:/|$)|[^/]*\\.py$'>
191
191
192 Combining 'patterns' with 'include' (resp. 'exclude') gives an
192 Combining 'patterns' with 'include' (resp. 'exclude') gives an
193 intersectionmatcher (resp. a differencematcher):
193 intersectionmatcher (resp. a differencematcher):
194 >>> type(_match(b'/foo', b'.', [b're:.*\.c$'], include=[b'path:lib']))
194 >>> type(_match(b'/foo', b'.', [b're:.*\.c$'], include=[b'path:lib']))
195 <class 'mercurial.match.intersectionmatcher'>
195 <class 'mercurial.match.intersectionmatcher'>
196 >>> type(_match(b'/foo', b'.', [b're:.*\.c$'], exclude=[b'path:build']))
196 >>> type(_match(b'/foo', b'.', [b're:.*\.c$'], exclude=[b'path:build']))
197 <class 'mercurial.match.differencematcher'>
197 <class 'mercurial.match.differencematcher'>
198
198
199 Notice that, if 'patterns' is empty, an alwaysmatcher is returned:
199 Notice that, if 'patterns' is empty, an alwaysmatcher is returned:
200 >>> _match(b'/foo', b'.', [])
200 >>> _match(b'/foo', b'.', [])
201 <alwaysmatcher>
201 <alwaysmatcher>
202
202
203 The 'default' argument determines which kind of pattern is assumed if a
203 The 'default' argument determines which kind of pattern is assumed if a
204 pattern has no prefix:
204 pattern has no prefix:
205 >>> _match(b'/foo', b'.', [b'.*\.c$'], default=b're')
205 >>> _match(b'/foo', b'.', [b'.*\.c$'], default=b're')
206 <patternmatcher patterns='.*\\.c$'>
206 <patternmatcher patterns='.*\\.c$'>
207 >>> _match(b'/foo', b'.', [b'main.py'], default=b'relpath')
207 >>> _match(b'/foo', b'.', [b'main.py'], default=b'relpath')
208 <patternmatcher patterns='main\\.py(?:/|$)'>
208 <patternmatcher patterns='main\\.py(?:/|$)'>
209 >>> _match(b'/foo', b'.', [b'main.py'], default=b're')
209 >>> _match(b'/foo', b'.', [b'main.py'], default=b're')
210 <patternmatcher patterns='main.py'>
210 <patternmatcher patterns='main.py'>
211
211
212 The primary use of matchers is to check whether a value (usually a file
212 The primary use of matchers is to check whether a value (usually a file
213 name) matches againset one of the patterns given at initialization. There
213 name) matches againset one of the patterns given at initialization. There
214 are two ways of doing this check.
214 are two ways of doing this check.
215
215
216 >>> m = _match(b'/foo', b'', [b're:.*\.c$', b'relpath:a'])
216 >>> m = _match(b'/foo', b'', [b're:.*\.c$', b'relpath:a'])
217
217
218 1. Calling the matcher with a file name returns True if any pattern
218 1. Calling the matcher with a file name returns True if any pattern
219 matches that file name:
219 matches that file name:
220 >>> m(b'a')
220 >>> m(b'a')
221 True
221 True
222 >>> m(b'main.c')
222 >>> m(b'main.c')
223 True
223 True
224 >>> m(b'test.py')
224 >>> m(b'test.py')
225 False
225 False
226
226
227 2. Using the exact() method only returns True if the file name matches one
227 2. Using the exact() method only returns True if the file name matches one
228 of the exact patterns (i.e. not re: or glob: patterns):
228 of the exact patterns (i.e. not re: or glob: patterns):
229 >>> m.exact(b'a')
229 >>> m.exact(b'a')
230 True
230 True
231 >>> m.exact(b'main.c')
231 >>> m.exact(b'main.c')
232 False
232 False
233 """
233 """
234 assert os.path.isabs(root)
234 assert os.path.isabs(root)
235 cwd = os.path.join(root, util.localpath(cwd))
235 cwd = os.path.join(root, util.localpath(cwd))
236 normalize = _donormalize
236 normalize = _donormalize
237 if icasefs:
237 if icasefs:
238 dirstate = ctx.repo().dirstate
238 dirstate = ctx.repo().dirstate
239 dsnormalize = dirstate.normalize
239 dsnormalize = dirstate.normalize
240
240
241 def normalize(patterns, default, root, cwd, auditor, warn):
241 def normalize(patterns, default, root, cwd, auditor, warn):
242 kp = _donormalize(patterns, default, root, cwd, auditor, warn)
242 kp = _donormalize(patterns, default, root, cwd, auditor, warn)
243 kindpats = []
243 kindpats = []
244 for kind, pats, source in kp:
244 for kind, pats, source in kp:
245 if kind not in (b're', b'relre'): # regex can't be normalized
245 if kind not in (b're', b'relre'): # regex can't be normalized
246 p = pats
246 p = pats
247 pats = dsnormalize(pats)
247 pats = dsnormalize(pats)
248
248
249 # Preserve the original to handle a case only rename.
249 # Preserve the original to handle a case only rename.
250 if p != pats and p in dirstate:
250 if p != pats and p in dirstate:
251 kindpats.append((kind, p, source))
251 kindpats.append((kind, p, source))
252
252
253 kindpats.append((kind, pats, source))
253 kindpats.append((kind, pats, source))
254 return kindpats
254 return kindpats
255
255
256 if patterns:
256 if patterns:
257 kindpats = normalize(patterns, default, root, cwd, auditor, warn)
257 kindpats = normalize(patterns, default, root, cwd, auditor, warn)
258 if _kindpatsalwaysmatch(kindpats):
258 if _kindpatsalwaysmatch(kindpats):
259 m = alwaysmatcher(badfn)
259 m = alwaysmatcher(badfn)
260 else:
260 else:
261 m = _buildkindpatsmatcher(
261 m = _buildkindpatsmatcher(
262 patternmatcher,
262 patternmatcher,
263 root,
263 root,
264 cwd,
264 cwd,
265 kindpats,
265 kindpats,
266 ctx=ctx,
266 ctx=ctx,
267 listsubrepos=listsubrepos,
267 listsubrepos=listsubrepos,
268 badfn=badfn,
268 badfn=badfn,
269 )
269 )
270 else:
270 else:
271 # It's a little strange that no patterns means to match everything.
271 # It's a little strange that no patterns means to match everything.
272 # Consider changing this to match nothing (probably using nevermatcher).
272 # Consider changing this to match nothing (probably using nevermatcher).
273 m = alwaysmatcher(badfn)
273 m = alwaysmatcher(badfn)
274
274
275 if include:
275 if include:
276 kindpats = normalize(include, b'glob', root, cwd, auditor, warn)
276 kindpats = normalize(include, b'glob', root, cwd, auditor, warn)
277 im = _buildkindpatsmatcher(
277 im = _buildkindpatsmatcher(
278 includematcher,
278 includematcher,
279 root,
279 root,
280 cwd,
280 cwd,
281 kindpats,
281 kindpats,
282 ctx=ctx,
282 ctx=ctx,
283 listsubrepos=listsubrepos,
283 listsubrepos=listsubrepos,
284 badfn=None,
284 badfn=None,
285 )
285 )
286 m = intersectmatchers(m, im)
286 m = intersectmatchers(m, im)
287 if exclude:
287 if exclude:
288 kindpats = normalize(exclude, b'glob', root, cwd, auditor, warn)
288 kindpats = normalize(exclude, b'glob', root, cwd, auditor, warn)
289 em = _buildkindpatsmatcher(
289 em = _buildkindpatsmatcher(
290 includematcher,
290 includematcher,
291 root,
291 root,
292 cwd,
292 cwd,
293 kindpats,
293 kindpats,
294 ctx=ctx,
294 ctx=ctx,
295 listsubrepos=listsubrepos,
295 listsubrepos=listsubrepos,
296 badfn=None,
296 badfn=None,
297 )
297 )
298 m = differencematcher(m, em)
298 m = differencematcher(m, em)
299 return m
299 return m
300
300
301
301
302 def exact(files, badfn=None):
302 def exact(files, badfn=None):
303 return exactmatcher(files, badfn=badfn)
303 return exactmatcher(files, badfn=badfn)
304
304
305
305
306 def always(badfn=None):
306 def always(badfn=None):
307 return alwaysmatcher(badfn)
307 return alwaysmatcher(badfn)
308
308
309
309
310 def never(badfn=None):
310 def never(badfn=None):
311 return nevermatcher(badfn)
311 return nevermatcher(badfn)
312
312
313
313
314 def badmatch(match, badfn):
314 def badmatch(match, badfn):
315 """Make a copy of the given matcher, replacing its bad method with the given
315 """Make a copy of the given matcher, replacing its bad method with the given
316 one.
316 one.
317 """
317 """
318 m = copy.copy(match)
318 m = copy.copy(match)
319 m.bad = badfn
319 m.bad = badfn
320 return m
320 return m
321
321
322
322
323 def _donormalize(patterns, default, root, cwd, auditor=None, warn=None):
323 def _donormalize(patterns, default, root, cwd, auditor=None, warn=None):
324 '''Convert 'kind:pat' from the patterns list to tuples with kind and
324 '''Convert 'kind:pat' from the patterns list to tuples with kind and
325 normalized and rooted patterns and with listfiles expanded.'''
325 normalized and rooted patterns and with listfiles expanded.'''
326 kindpats = []
326 kindpats = []
327 for kind, pat in [_patsplit(p, default) for p in patterns]:
327 for kind, pat in [_patsplit(p, default) for p in patterns]:
328 if kind in cwdrelativepatternkinds:
328 if kind in cwdrelativepatternkinds:
329 pat = pathutil.canonpath(root, cwd, pat, auditor=auditor)
329 pat = pathutil.canonpath(root, cwd, pat, auditor=auditor)
330 elif kind in (b'relglob', b'path', b'rootfilesin', b'rootglob'):
330 elif kind in (b'relglob', b'path', b'rootfilesin', b'rootglob'):
331 pat = util.normpath(pat)
331 pat = util.normpath(pat)
332 elif kind in (b'listfile', b'listfile0'):
332 elif kind in (b'listfile', b'listfile0'):
333 try:
333 try:
334 files = util.readfile(pat)
334 files = util.readfile(pat)
335 if kind == b'listfile0':
335 if kind == b'listfile0':
336 files = files.split(b'\0')
336 files = files.split(b'\0')
337 else:
337 else:
338 files = files.splitlines()
338 files = files.splitlines()
339 files = [f for f in files if f]
339 files = [f for f in files if f]
340 except EnvironmentError:
340 except EnvironmentError:
341 raise error.Abort(_(b"unable to read file list (%s)") % pat)
341 raise error.Abort(_(b"unable to read file list (%s)") % pat)
342 for k, p, source in _donormalize(
342 for k, p, source in _donormalize(
343 files, default, root, cwd, auditor, warn
343 files, default, root, cwd, auditor, warn
344 ):
344 ):
345 kindpats.append((k, p, pat))
345 kindpats.append((k, p, pat))
346 continue
346 continue
347 elif kind == b'include':
347 elif kind == b'include':
348 try:
348 try:
349 fullpath = os.path.join(root, util.localpath(pat))
349 fullpath = os.path.join(root, util.localpath(pat))
350 includepats = readpatternfile(fullpath, warn)
350 includepats = readpatternfile(fullpath, warn)
351 for k, p, source in _donormalize(
351 for k, p, source in _donormalize(
352 includepats, default, root, cwd, auditor, warn
352 includepats, default, root, cwd, auditor, warn
353 ):
353 ):
354 kindpats.append((k, p, source or pat))
354 kindpats.append((k, p, source or pat))
355 except error.Abort as inst:
355 except error.Abort as inst:
356 raise error.Abort(
356 raise error.Abort(
357 b'%s: %s'
357 b'%s: %s'
358 % (pat, inst[0]) # pytype: disable=unsupported-operands
358 % (pat, inst[0]) # pytype: disable=unsupported-operands
359 )
359 )
360 except IOError as inst:
360 except IOError as inst:
361 if warn:
361 if warn:
362 warn(
362 warn(
363 _(b"skipping unreadable pattern file '%s': %s\n")
363 _(b"skipping unreadable pattern file '%s': %s\n")
364 % (pat, stringutil.forcebytestr(inst.strerror))
364 % (pat, stringutil.forcebytestr(inst.strerror))
365 )
365 )
366 continue
366 continue
367 # else: re or relre - which cannot be normalized
367 # else: re or relre - which cannot be normalized
368 kindpats.append((kind, pat, b''))
368 kindpats.append((kind, pat, b''))
369 return kindpats
369 return kindpats
370
370
371
371
372 class basematcher(object):
372 class basematcher(object):
373 def __init__(self, badfn=None):
373 def __init__(self, badfn=None):
374 if badfn is not None:
374 if badfn is not None:
375 self.bad = badfn
375 self.bad = badfn
376
376
377 def __call__(self, fn):
377 def __call__(self, fn):
378 return self.matchfn(fn)
378 return self.matchfn(fn)
379
379
380 # Callbacks related to how the matcher is used by dirstate.walk.
380 # Callbacks related to how the matcher is used by dirstate.walk.
381 # Subscribers to these events must monkeypatch the matcher object.
381 # Subscribers to these events must monkeypatch the matcher object.
382 def bad(self, f, msg):
382 def bad(self, f, msg):
383 '''Callback from dirstate.walk for each explicit file that can't be
383 '''Callback from dirstate.walk for each explicit file that can't be
384 found/accessed, with an error message.'''
384 found/accessed, with an error message.'''
385
385
386 # If an traversedir is set, it will be called when a directory discovered
386 # If an traversedir is set, it will be called when a directory discovered
387 # by recursive traversal is visited.
387 # by recursive traversal is visited.
388 traversedir = None
388 traversedir = None
389
389
390 @propertycache
390 @propertycache
391 def _files(self):
391 def _files(self):
392 return []
392 return []
393
393
394 def files(self):
394 def files(self):
395 '''Explicitly listed files or patterns or roots:
395 '''Explicitly listed files or patterns or roots:
396 if no patterns or .always(): empty list,
396 if no patterns or .always(): empty list,
397 if exact: list exact files,
397 if exact: list exact files,
398 if not .anypats(): list all files and dirs,
398 if not .anypats(): list all files and dirs,
399 else: optimal roots'''
399 else: optimal roots'''
400 return self._files
400 return self._files
401
401
402 @propertycache
402 @propertycache
403 def _fileset(self):
403 def _fileset(self):
404 return set(self._files)
404 return set(self._files)
405
405
406 def exact(self, f):
406 def exact(self, f):
407 '''Returns True if f is in .files().'''
407 '''Returns True if f is in .files().'''
408 return f in self._fileset
408 return f in self._fileset
409
409
410 def matchfn(self, f):
410 def matchfn(self, f):
411 return False
411 return False
412
412
413 def visitdir(self, dir):
413 def visitdir(self, dir):
414 '''Decides whether a directory should be visited based on whether it
414 '''Decides whether a directory should be visited based on whether it
415 has potential matches in it or one of its subdirectories. This is
415 has potential matches in it or one of its subdirectories. This is
416 based on the match's primary, included, and excluded patterns.
416 based on the match's primary, included, and excluded patterns.
417
417
418 Returns the string 'all' if the given directory and all subdirectories
418 Returns the string 'all' if the given directory and all subdirectories
419 should be visited. Otherwise returns True or False indicating whether
419 should be visited. Otherwise returns True or False indicating whether
420 the given directory should be visited.
420 the given directory should be visited.
421 '''
421 '''
422 return True
422 return True
423
423
424 def visitchildrenset(self, dir):
424 def visitchildrenset(self, dir):
425 '''Decides whether a directory should be visited based on whether it
425 '''Decides whether a directory should be visited based on whether it
426 has potential matches in it or one of its subdirectories, and
426 has potential matches in it or one of its subdirectories, and
427 potentially lists which subdirectories of that directory should be
427 potentially lists which subdirectories of that directory should be
428 visited. This is based on the match's primary, included, and excluded
428 visited. This is based on the match's primary, included, and excluded
429 patterns.
429 patterns.
430
430
431 This function is very similar to 'visitdir', and the following mapping
431 This function is very similar to 'visitdir', and the following mapping
432 can be applied:
432 can be applied:
433
433
434 visitdir | visitchildrenlist
434 visitdir | visitchildrenlist
435 ----------+-------------------
435 ----------+-------------------
436 False | set()
436 False | set()
437 'all' | 'all'
437 'all' | 'all'
438 True | 'this' OR non-empty set of subdirs -or files- to visit
438 True | 'this' OR non-empty set of subdirs -or files- to visit
439
439
440 Example:
440 Example:
441 Assume matchers ['path:foo/bar', 'rootfilesin:qux'], we would return
441 Assume matchers ['path:foo/bar', 'rootfilesin:qux'], we would return
442 the following values (assuming the implementation of visitchildrenset
442 the following values (assuming the implementation of visitchildrenset
443 is capable of recognizing this; some implementations are not).
443 is capable of recognizing this; some implementations are not).
444
444
445 '' -> {'foo', 'qux'}
445 '' -> {'foo', 'qux'}
446 'baz' -> set()
446 'baz' -> set()
447 'foo' -> {'bar'}
447 'foo' -> {'bar'}
448 # Ideally this would be 'all', but since the prefix nature of matchers
448 # Ideally this would be 'all', but since the prefix nature of matchers
449 # is applied to the entire matcher, we have to downgrade this to
449 # is applied to the entire matcher, we have to downgrade this to
450 # 'this' due to the non-prefix 'rootfilesin'-kind matcher being mixed
450 # 'this' due to the non-prefix 'rootfilesin'-kind matcher being mixed
451 # in.
451 # in.
452 'foo/bar' -> 'this'
452 'foo/bar' -> 'this'
453 'qux' -> 'this'
453 'qux' -> 'this'
454
454
455 Important:
455 Important:
456 Most matchers do not know if they're representing files or
456 Most matchers do not know if they're representing files or
457 directories. They see ['path:dir/f'] and don't know whether 'f' is a
457 directories. They see ['path:dir/f'] and don't know whether 'f' is a
458 file or a directory, so visitchildrenset('dir') for most matchers will
458 file or a directory, so visitchildrenset('dir') for most matchers will
459 return {'f'}, but if the matcher knows it's a file (like exactmatcher
459 return {'f'}, but if the matcher knows it's a file (like exactmatcher
460 does), it may return 'this'. Do not rely on the return being a set
460 does), it may return 'this'. Do not rely on the return being a set
461 indicating that there are no files in this dir to investigate (or
461 indicating that there are no files in this dir to investigate (or
462 equivalently that if there are files to investigate in 'dir' that it
462 equivalently that if there are files to investigate in 'dir' that it
463 will always return 'this').
463 will always return 'this').
464 '''
464 '''
465 return b'this'
465 return b'this'
466
466
467 def always(self):
467 def always(self):
468 '''Matcher will match everything and .files() will be empty --
468 '''Matcher will match everything and .files() will be empty --
469 optimization might be possible.'''
469 optimization might be possible.'''
470 return False
470 return False
471
471
472 def isexact(self):
472 def isexact(self):
473 '''Matcher will match exactly the list of files in .files() --
473 '''Matcher will match exactly the list of files in .files() --
474 optimization might be possible.'''
474 optimization might be possible.'''
475 return False
475 return False
476
476
477 def prefix(self):
477 def prefix(self):
478 '''Matcher will match the paths in .files() recursively --
478 '''Matcher will match the paths in .files() recursively --
479 optimization might be possible.'''
479 optimization might be possible.'''
480 return False
480 return False
481
481
482 def anypats(self):
482 def anypats(self):
483 '''None of .always(), .isexact(), and .prefix() is true --
483 '''None of .always(), .isexact(), and .prefix() is true --
484 optimizations will be difficult.'''
484 optimizations will be difficult.'''
485 return not self.always() and not self.isexact() and not self.prefix()
485 return not self.always() and not self.isexact() and not self.prefix()
486
486
487
487
488 class alwaysmatcher(basematcher):
488 class alwaysmatcher(basematcher):
489 '''Matches everything.'''
489 '''Matches everything.'''
490
490
491 def __init__(self, badfn=None):
491 def __init__(self, badfn=None):
492 super(alwaysmatcher, self).__init__(badfn)
492 super(alwaysmatcher, self).__init__(badfn)
493
493
494 def always(self):
494 def always(self):
495 return True
495 return True
496
496
497 def matchfn(self, f):
497 def matchfn(self, f):
498 return True
498 return True
499
499
500 def visitdir(self, dir):
500 def visitdir(self, dir):
501 return b'all'
501 return b'all'
502
502
503 def visitchildrenset(self, dir):
503 def visitchildrenset(self, dir):
504 return b'all'
504 return b'all'
505
505
506 def __repr__(self):
506 def __repr__(self):
507 return r'<alwaysmatcher>'
507 return r'<alwaysmatcher>'
508
508
509
509
510 class nevermatcher(basematcher):
510 class nevermatcher(basematcher):
511 '''Matches nothing.'''
511 '''Matches nothing.'''
512
512
513 def __init__(self, badfn=None):
513 def __init__(self, badfn=None):
514 super(nevermatcher, self).__init__(badfn)
514 super(nevermatcher, self).__init__(badfn)
515
515
516 # It's a little weird to say that the nevermatcher is an exact matcher
516 # It's a little weird to say that the nevermatcher is an exact matcher
517 # or a prefix matcher, but it seems to make sense to let callers take
517 # or a prefix matcher, but it seems to make sense to let callers take
518 # fast paths based on either. There will be no exact matches, nor any
518 # fast paths based on either. There will be no exact matches, nor any
519 # prefixes (files() returns []), so fast paths iterating over them should
519 # prefixes (files() returns []), so fast paths iterating over them should
520 # be efficient (and correct).
520 # be efficient (and correct).
521 def isexact(self):
521 def isexact(self):
522 return True
522 return True
523
523
524 def prefix(self):
524 def prefix(self):
525 return True
525 return True
526
526
527 def visitdir(self, dir):
527 def visitdir(self, dir):
528 return False
528 return False
529
529
530 def visitchildrenset(self, dir):
530 def visitchildrenset(self, dir):
531 return set()
531 return set()
532
532
533 def __repr__(self):
533 def __repr__(self):
534 return r'<nevermatcher>'
534 return r'<nevermatcher>'
535
535
536
536
537 class predicatematcher(basematcher):
537 class predicatematcher(basematcher):
538 """A matcher adapter for a simple boolean function"""
538 """A matcher adapter for a simple boolean function"""
539
539
540 def __init__(self, predfn, predrepr=None, badfn=None):
540 def __init__(self, predfn, predrepr=None, badfn=None):
541 super(predicatematcher, self).__init__(badfn)
541 super(predicatematcher, self).__init__(badfn)
542 self.matchfn = predfn
542 self.matchfn = predfn
543 self._predrepr = predrepr
543 self._predrepr = predrepr
544
544
545 @encoding.strmethod
545 @encoding.strmethod
546 def __repr__(self):
546 def __repr__(self):
547 s = stringutil.buildrepr(self._predrepr) or pycompat.byterepr(
547 s = stringutil.buildrepr(self._predrepr) or pycompat.byterepr(
548 self.matchfn
548 self.matchfn
549 )
549 )
550 return b'<predicatenmatcher pred=%s>' % s
550 return b'<predicatenmatcher pred=%s>' % s
551
551
552
552
553 class patternmatcher(basematcher):
553 class patternmatcher(basematcher):
554 r"""Matches a set of (kind, pat, source) against a 'root' directory.
554 r"""Matches a set of (kind, pat, source) against a 'root' directory.
555
555
556 >>> kindpats = [
556 >>> kindpats = [
557 ... (b're', br'.*\.c$', b''),
557 ... (b're', br'.*\.c$', b''),
558 ... (b'path', b'foo/a', b''),
558 ... (b'path', b'foo/a', b''),
559 ... (b'relpath', b'b', b''),
559 ... (b'relpath', b'b', b''),
560 ... (b'glob', b'*.h', b''),
560 ... (b'glob', b'*.h', b''),
561 ... ]
561 ... ]
562 >>> m = patternmatcher(b'foo', kindpats)
562 >>> m = patternmatcher(b'foo', kindpats)
563 >>> m(b'main.c') # matches re:.*\.c$
563 >>> m(b'main.c') # matches re:.*\.c$
564 True
564 True
565 >>> m(b'b.txt')
565 >>> m(b'b.txt')
566 False
566 False
567 >>> m(b'foo/a') # matches path:foo/a
567 >>> m(b'foo/a') # matches path:foo/a
568 True
568 True
569 >>> m(b'a') # does not match path:b, since 'root' is 'foo'
569 >>> m(b'a') # does not match path:b, since 'root' is 'foo'
570 False
570 False
571 >>> m(b'b') # matches relpath:b, since 'root' is 'foo'
571 >>> m(b'b') # matches relpath:b, since 'root' is 'foo'
572 True
572 True
573 >>> m(b'lib.h') # matches glob:*.h
573 >>> m(b'lib.h') # matches glob:*.h
574 True
574 True
575
575
576 >>> m.files()
576 >>> m.files()
577 ['', 'foo/a', 'b', '']
577 ['', 'foo/a', 'b', '']
578 >>> m.exact(b'foo/a')
578 >>> m.exact(b'foo/a')
579 True
579 True
580 >>> m.exact(b'b')
580 >>> m.exact(b'b')
581 True
581 True
582 >>> m.exact(b'lib.h') # exact matches are for (rel)path kinds
582 >>> m.exact(b'lib.h') # exact matches are for (rel)path kinds
583 False
583 False
584 """
584 """
585
585
586 def __init__(self, root, kindpats, badfn=None):
586 def __init__(self, root, kindpats, badfn=None):
587 super(patternmatcher, self).__init__(badfn)
587 super(patternmatcher, self).__init__(badfn)
588
588
589 self._files = _explicitfiles(kindpats)
589 self._files = _explicitfiles(kindpats)
590 self._prefix = _prefix(kindpats)
590 self._prefix = _prefix(kindpats)
591 self._pats, self.matchfn = _buildmatch(kindpats, b'$', root)
591 self._pats, self.matchfn = _buildmatch(kindpats, b'$', root)
592
592
593 @propertycache
593 @propertycache
594 def _dirs(self):
594 def _dirs(self):
595 return set(pathutil.dirs(self._fileset))
595 return set(pathutil.dirs(self._fileset))
596
596
597 def visitdir(self, dir):
597 def visitdir(self, dir):
598 if self._prefix and dir in self._fileset:
598 if self._prefix and dir in self._fileset:
599 return b'all'
599 return b'all'
600 return (
600 return (
601 dir in self._fileset
601 dir in self._fileset
602 or dir in self._dirs
602 or dir in self._dirs
603 or any(
603 or any(
604 parentdir in self._fileset
604 parentdir in self._fileset
605 for parentdir in pathutil.finddirs(dir)
605 for parentdir in pathutil.finddirs(dir)
606 )
606 )
607 )
607 )
608
608
609 def visitchildrenset(self, dir):
609 def visitchildrenset(self, dir):
610 ret = self.visitdir(dir)
610 ret = self.visitdir(dir)
611 if ret is True:
611 if ret is True:
612 return b'this'
612 return b'this'
613 elif not ret:
613 elif not ret:
614 return set()
614 return set()
615 assert ret == b'all'
615 assert ret == b'all'
616 return b'all'
616 return b'all'
617
617
618 def prefix(self):
618 def prefix(self):
619 return self._prefix
619 return self._prefix
620
620
621 @encoding.strmethod
621 @encoding.strmethod
622 def __repr__(self):
622 def __repr__(self):
623 return b'<patternmatcher patterns=%r>' % pycompat.bytestr(self._pats)
623 return b'<patternmatcher patterns=%r>' % pycompat.bytestr(self._pats)
624
624
625
625
626 # This is basically a reimplementation of pathutil.dirs that stores the
626 # This is basically a reimplementation of pathutil.dirs that stores the
627 # children instead of just a count of them, plus a small optional optimization
627 # children instead of just a count of them, plus a small optional optimization
628 # to avoid some directories we don't need.
628 # to avoid some directories we don't need.
629 class _dirchildren(object):
629 class _dirchildren(object):
630 def __init__(self, paths, onlyinclude=None):
630 def __init__(self, paths, onlyinclude=None):
631 self._dirs = {}
631 self._dirs = {}
632 self._onlyinclude = onlyinclude or []
632 self._onlyinclude = onlyinclude or []
633 addpath = self.addpath
633 addpath = self.addpath
634 for f in paths:
634 for f in paths:
635 addpath(f)
635 addpath(f)
636
636
637 def addpath(self, path):
637 def addpath(self, path):
638 if path == b'':
638 if path == b'':
639 return
639 return
640 dirs = self._dirs
640 dirs = self._dirs
641 findsplitdirs = _dirchildren._findsplitdirs
641 findsplitdirs = _dirchildren._findsplitdirs
642 for d, b in findsplitdirs(path):
642 for d, b in findsplitdirs(path):
643 if d not in self._onlyinclude:
643 if d not in self._onlyinclude:
644 continue
644 continue
645 dirs.setdefault(d, set()).add(b)
645 dirs.setdefault(d, set()).add(b)
646
646
647 @staticmethod
647 @staticmethod
648 def _findsplitdirs(path):
648 def _findsplitdirs(path):
649 # yields (dirname, basename) tuples, walking back to the root. This is
649 # yields (dirname, basename) tuples, walking back to the root. This is
650 # very similar to pathutil.finddirs, except:
650 # very similar to pathutil.finddirs, except:
651 # - produces a (dirname, basename) tuple, not just 'dirname'
651 # - produces a (dirname, basename) tuple, not just 'dirname'
652 # Unlike manifest._splittopdir, this does not suffix `dirname` with a
652 # Unlike manifest._splittopdir, this does not suffix `dirname` with a
653 # slash.
653 # slash.
654 oldpos = len(path)
654 oldpos = len(path)
655 pos = path.rfind(b'/')
655 pos = path.rfind(b'/')
656 while pos != -1:
656 while pos != -1:
657 yield path[:pos], path[pos + 1 : oldpos]
657 yield path[:pos], path[pos + 1 : oldpos]
658 oldpos = pos
658 oldpos = pos
659 pos = path.rfind(b'/', 0, pos)
659 pos = path.rfind(b'/', 0, pos)
660 yield b'', path[:oldpos]
660 yield b'', path[:oldpos]
661
661
662 def get(self, path):
662 def get(self, path):
663 return self._dirs.get(path, set())
663 return self._dirs.get(path, set())
664
664
665
665
666 class includematcher(basematcher):
666 class includematcher(basematcher):
667 def __init__(self, root, kindpats, badfn=None):
667 def __init__(self, root, kindpats, badfn=None):
668 super(includematcher, self).__init__(badfn)
668 super(includematcher, self).__init__(badfn)
669
669
670 self._pats, self.matchfn = _buildmatch(kindpats, b'(?:/|$)', root)
670 self._pats, self.matchfn = _buildmatch(kindpats, b'(?:/|$)', root)
671 self._prefix = _prefix(kindpats)
671 self._prefix = _prefix(kindpats)
672 roots, dirs, parents = _rootsdirsandparents(kindpats)
672 roots, dirs, parents = _rootsdirsandparents(kindpats)
673 # roots are directories which are recursively included.
673 # roots are directories which are recursively included.
674 self._roots = set(roots)
674 self._roots = set(roots)
675 # dirs are directories which are non-recursively included.
675 # dirs are directories which are non-recursively included.
676 self._dirs = set(dirs)
676 self._dirs = set(dirs)
677 # parents are directories which are non-recursively included because
677 # parents are directories which are non-recursively included because
678 # they are needed to get to items in _dirs or _roots.
678 # they are needed to get to items in _dirs or _roots.
679 self._parents = parents
679 self._parents = parents
680
680
681 def visitdir(self, dir):
681 def visitdir(self, dir):
682 if self._prefix and dir in self._roots:
682 if self._prefix and dir in self._roots:
683 return b'all'
683 return b'all'
684 return (
684 return (
685 dir in self._roots
685 dir in self._roots
686 or dir in self._dirs
686 or dir in self._dirs
687 or dir in self._parents
687 or dir in self._parents
688 or any(
688 or any(
689 parentdir in self._roots for parentdir in pathutil.finddirs(dir)
689 parentdir in self._roots for parentdir in pathutil.finddirs(dir)
690 )
690 )
691 )
691 )
692
692
693 @propertycache
693 @propertycache
694 def _allparentschildren(self):
694 def _allparentschildren(self):
695 # It may seem odd that we add dirs, roots, and parents, and then
695 # It may seem odd that we add dirs, roots, and parents, and then
696 # restrict to only parents. This is to catch the case of:
696 # restrict to only parents. This is to catch the case of:
697 # dirs = ['foo/bar']
697 # dirs = ['foo/bar']
698 # parents = ['foo']
698 # parents = ['foo']
699 # if we asked for the children of 'foo', but had only added
699 # if we asked for the children of 'foo', but had only added
700 # self._parents, we wouldn't be able to respond ['bar'].
700 # self._parents, we wouldn't be able to respond ['bar'].
701 return _dirchildren(
701 return _dirchildren(
702 itertools.chain(self._dirs, self._roots, self._parents),
702 itertools.chain(self._dirs, self._roots, self._parents),
703 onlyinclude=self._parents,
703 onlyinclude=self._parents,
704 )
704 )
705
705
706 def visitchildrenset(self, dir):
706 def visitchildrenset(self, dir):
707 if self._prefix and dir in self._roots:
707 if self._prefix and dir in self._roots:
708 return b'all'
708 return b'all'
709 # Note: this does *not* include the 'dir in self._parents' case from
709 # Note: this does *not* include the 'dir in self._parents' case from
710 # visitdir, that's handled below.
710 # visitdir, that's handled below.
711 if (
711 if (
712 b'' in self._roots
712 b'' in self._roots
713 or dir in self._roots
713 or dir in self._roots
714 or dir in self._dirs
714 or dir in self._dirs
715 or any(
715 or any(
716 parentdir in self._roots for parentdir in pathutil.finddirs(dir)
716 parentdir in self._roots for parentdir in pathutil.finddirs(dir)
717 )
717 )
718 ):
718 ):
719 return b'this'
719 return b'this'
720
720
721 if dir in self._parents:
721 if dir in self._parents:
722 return self._allparentschildren.get(dir) or set()
722 return self._allparentschildren.get(dir) or set()
723 return set()
723 return set()
724
724
725 @encoding.strmethod
725 @encoding.strmethod
726 def __repr__(self):
726 def __repr__(self):
727 return b'<includematcher includes=%r>' % pycompat.bytestr(self._pats)
727 return b'<includematcher includes=%r>' % pycompat.bytestr(self._pats)
728
728
729
729
730 class exactmatcher(basematcher):
730 class exactmatcher(basematcher):
731 r'''Matches the input files exactly. They are interpreted as paths, not
731 r'''Matches the input files exactly. They are interpreted as paths, not
732 patterns (so no kind-prefixes).
732 patterns (so no kind-prefixes).
733
733
734 >>> m = exactmatcher([b'a.txt', br're:.*\.c$'])
734 >>> m = exactmatcher([b'a.txt', br're:.*\.c$'])
735 >>> m(b'a.txt')
735 >>> m(b'a.txt')
736 True
736 True
737 >>> m(b'b.txt')
737 >>> m(b'b.txt')
738 False
738 False
739
739
740 Input files that would be matched are exactly those returned by .files()
740 Input files that would be matched are exactly those returned by .files()
741 >>> m.files()
741 >>> m.files()
742 ['a.txt', 're:.*\\.c$']
742 ['a.txt', 're:.*\\.c$']
743
743
744 So pattern 're:.*\.c$' is not considered as a regex, but as a file name
744 So pattern 're:.*\.c$' is not considered as a regex, but as a file name
745 >>> m(b'main.c')
745 >>> m(b'main.c')
746 False
746 False
747 >>> m(br're:.*\.c$')
747 >>> m(br're:.*\.c$')
748 True
748 True
749 '''
749 '''
750
750
751 def __init__(self, files, badfn=None):
751 def __init__(self, files, badfn=None):
752 super(exactmatcher, self).__init__(badfn)
752 super(exactmatcher, self).__init__(badfn)
753
753
754 if isinstance(files, list):
754 if isinstance(files, list):
755 self._files = files
755 self._files = files
756 else:
756 else:
757 self._files = list(files)
757 self._files = list(files)
758
758
759 matchfn = basematcher.exact
759 matchfn = basematcher.exact
760
760
761 @propertycache
761 @propertycache
762 def _dirs(self):
762 def _dirs(self):
763 return set(pathutil.dirs(self._fileset))
763 return set(pathutil.dirs(self._fileset))
764
764
765 def visitdir(self, dir):
765 def visitdir(self, dir):
766 return dir in self._dirs
766 return dir in self._dirs
767
767
768 def visitchildrenset(self, dir):
768 def visitchildrenset(self, dir):
769 if not self._fileset or dir not in self._dirs:
769 if not self._fileset or dir not in self._dirs:
770 return set()
770 return set()
771
771
772 candidates = self._fileset | self._dirs - {b''}
772 candidates = self._fileset | self._dirs - {b''}
773 if dir != b'':
773 if dir != b'':
774 d = dir + b'/'
774 d = dir + b'/'
775 candidates = set(c[len(d) :] for c in candidates if c.startswith(d))
775 candidates = set(c[len(d) :] for c in candidates if c.startswith(d))
776 # self._dirs includes all of the directories, recursively, so if
776 # self._dirs includes all of the directories, recursively, so if
777 # we're attempting to match foo/bar/baz.txt, it'll have '', 'foo',
777 # we're attempting to match foo/bar/baz.txt, it'll have '', 'foo',
778 # 'foo/bar' in it. Thus we can safely ignore a candidate that has a
778 # 'foo/bar' in it. Thus we can safely ignore a candidate that has a
779 # '/' in it, indicating a it's for a subdir-of-a-subdir; the
779 # '/' in it, indicating a it's for a subdir-of-a-subdir; the
780 # immediate subdir will be in there without a slash.
780 # immediate subdir will be in there without a slash.
781 ret = {c for c in candidates if b'/' not in c}
781 ret = {c for c in candidates if b'/' not in c}
782 # We really do not expect ret to be empty, since that would imply that
782 # We really do not expect ret to be empty, since that would imply that
783 # there's something in _dirs that didn't have a file in _fileset.
783 # there's something in _dirs that didn't have a file in _fileset.
784 assert ret
784 assert ret
785 return ret
785 return ret
786
786
787 def isexact(self):
787 def isexact(self):
788 return True
788 return True
789
789
790 @encoding.strmethod
790 @encoding.strmethod
791 def __repr__(self):
791 def __repr__(self):
792 return b'<exactmatcher files=%r>' % self._files
792 return b'<exactmatcher files=%r>' % self._files
793
793
794
794
795 class differencematcher(basematcher):
795 class differencematcher(basematcher):
796 '''Composes two matchers by matching if the first matches and the second
796 '''Composes two matchers by matching if the first matches and the second
797 does not.
797 does not.
798
798
799 The second matcher's non-matching-attributes (bad, traversedir) are ignored.
799 The second matcher's non-matching-attributes (bad, traversedir) are ignored.
800 '''
800 '''
801
801
802 def __init__(self, m1, m2):
802 def __init__(self, m1, m2):
803 super(differencematcher, self).__init__()
803 super(differencematcher, self).__init__()
804 self._m1 = m1
804 self._m1 = m1
805 self._m2 = m2
805 self._m2 = m2
806 self.bad = m1.bad
806 self.bad = m1.bad
807 self.traversedir = m1.traversedir
807 self.traversedir = m1.traversedir
808
808
809 def matchfn(self, f):
809 def matchfn(self, f):
810 return self._m1(f) and not self._m2(f)
810 return self._m1(f) and not self._m2(f)
811
811
812 @propertycache
812 @propertycache
813 def _files(self):
813 def _files(self):
814 if self.isexact():
814 if self.isexact():
815 return [f for f in self._m1.files() if self(f)]
815 return [f for f in self._m1.files() if self(f)]
816 # If m1 is not an exact matcher, we can't easily figure out the set of
816 # If m1 is not an exact matcher, we can't easily figure out the set of
817 # files, because its files() are not always files. For example, if
817 # files, because its files() are not always files. For example, if
818 # m1 is "path:dir" and m2 is "rootfileins:.", we don't
818 # m1 is "path:dir" and m2 is "rootfileins:.", we don't
819 # want to remove "dir" from the set even though it would match m2,
819 # want to remove "dir" from the set even though it would match m2,
820 # because the "dir" in m1 may not be a file.
820 # because the "dir" in m1 may not be a file.
821 return self._m1.files()
821 return self._m1.files()
822
822
823 def visitdir(self, dir):
823 def visitdir(self, dir):
824 if self._m2.visitdir(dir) == b'all':
824 if self._m2.visitdir(dir) == b'all':
825 return False
825 return False
826 elif not self._m2.visitdir(dir):
826 elif not self._m2.visitdir(dir):
827 # m2 does not match dir, we can return 'all' here if possible
827 # m2 does not match dir, we can return 'all' here if possible
828 return self._m1.visitdir(dir)
828 return self._m1.visitdir(dir)
829 return bool(self._m1.visitdir(dir))
829 return bool(self._m1.visitdir(dir))
830
830
831 def visitchildrenset(self, dir):
831 def visitchildrenset(self, dir):
832 m2_set = self._m2.visitchildrenset(dir)
832 m2_set = self._m2.visitchildrenset(dir)
833 if m2_set == b'all':
833 if m2_set == b'all':
834 return set()
834 return set()
835 m1_set = self._m1.visitchildrenset(dir)
835 m1_set = self._m1.visitchildrenset(dir)
836 # Possible values for m1: 'all', 'this', set(...), set()
836 # Possible values for m1: 'all', 'this', set(...), set()
837 # Possible values for m2: 'this', set(...), set()
837 # Possible values for m2: 'this', set(...), set()
838 # If m2 has nothing under here that we care about, return m1, even if
838 # If m2 has nothing under here that we care about, return m1, even if
839 # it's 'all'. This is a change in behavior from visitdir, which would
839 # it's 'all'. This is a change in behavior from visitdir, which would
840 # return True, not 'all', for some reason.
840 # return True, not 'all', for some reason.
841 if not m2_set:
841 if not m2_set:
842 return m1_set
842 return m1_set
843 if m1_set in [b'all', b'this']:
843 if m1_set in [b'all', b'this']:
844 # Never return 'all' here if m2_set is any kind of non-empty (either
844 # Never return 'all' here if m2_set is any kind of non-empty (either
845 # 'this' or set(foo)), since m2 might return set() for a
845 # 'this' or set(foo)), since m2 might return set() for a
846 # subdirectory.
846 # subdirectory.
847 return b'this'
847 return b'this'
848 # Possible values for m1: set(...), set()
848 # Possible values for m1: set(...), set()
849 # Possible values for m2: 'this', set(...)
849 # Possible values for m2: 'this', set(...)
850 # We ignore m2's set results. They're possibly incorrect:
850 # We ignore m2's set results. They're possibly incorrect:
851 # m1 = path:dir/subdir, m2=rootfilesin:dir, visitchildrenset(''):
851 # m1 = path:dir/subdir, m2=rootfilesin:dir, visitchildrenset(''):
852 # m1 returns {'dir'}, m2 returns {'dir'}, if we subtracted we'd
852 # m1 returns {'dir'}, m2 returns {'dir'}, if we subtracted we'd
853 # return set(), which is *not* correct, we still need to visit 'dir'!
853 # return set(), which is *not* correct, we still need to visit 'dir'!
854 return m1_set
854 return m1_set
855
855
856 def isexact(self):
856 def isexact(self):
857 return self._m1.isexact()
857 return self._m1.isexact()
858
858
859 @encoding.strmethod
859 @encoding.strmethod
860 def __repr__(self):
860 def __repr__(self):
861 return b'<differencematcher m1=%r, m2=%r>' % (self._m1, self._m2)
861 return b'<differencematcher m1=%r, m2=%r>' % (self._m1, self._m2)
862
862
863
863
864 def intersectmatchers(m1, m2):
864 def intersectmatchers(m1, m2):
865 '''Composes two matchers by matching if both of them match.
865 '''Composes two matchers by matching if both of them match.
866
866
867 The second matcher's non-matching-attributes (bad, traversedir) are ignored.
867 The second matcher's non-matching-attributes (bad, traversedir) are ignored.
868 '''
868 '''
869 if m1 is None or m2 is None:
869 if m1 is None or m2 is None:
870 return m1 or m2
870 return m1 or m2
871 if m1.always():
871 if m1.always():
872 m = copy.copy(m2)
872 m = copy.copy(m2)
873 # TODO: Consider encapsulating these things in a class so there's only
873 # TODO: Consider encapsulating these things in a class so there's only
874 # one thing to copy from m1.
874 # one thing to copy from m1.
875 m.bad = m1.bad
875 m.bad = m1.bad
876 m.traversedir = m1.traversedir
876 m.traversedir = m1.traversedir
877 return m
877 return m
878 if m2.always():
878 if m2.always():
879 m = copy.copy(m1)
879 m = copy.copy(m1)
880 return m
880 return m
881 return intersectionmatcher(m1, m2)
881 return intersectionmatcher(m1, m2)
882
882
883
883
884 class intersectionmatcher(basematcher):
884 class intersectionmatcher(basematcher):
885 def __init__(self, m1, m2):
885 def __init__(self, m1, m2):
886 super(intersectionmatcher, self).__init__()
886 super(intersectionmatcher, self).__init__()
887 self._m1 = m1
887 self._m1 = m1
888 self._m2 = m2
888 self._m2 = m2
889 self.bad = m1.bad
889 self.bad = m1.bad
890 self.traversedir = m1.traversedir
890 self.traversedir = m1.traversedir
891
891
892 @propertycache
892 @propertycache
893 def _files(self):
893 def _files(self):
894 if self.isexact():
894 if self.isexact():
895 m1, m2 = self._m1, self._m2
895 m1, m2 = self._m1, self._m2
896 if not m1.isexact():
896 if not m1.isexact():
897 m1, m2 = m2, m1
897 m1, m2 = m2, m1
898 return [f for f in m1.files() if m2(f)]
898 return [f for f in m1.files() if m2(f)]
899 # It neither m1 nor m2 is an exact matcher, we can't easily intersect
899 # It neither m1 nor m2 is an exact matcher, we can't easily intersect
900 # the set of files, because their files() are not always files. For
900 # the set of files, because their files() are not always files. For
901 # example, if intersecting a matcher "-I glob:foo.txt" with matcher of
901 # example, if intersecting a matcher "-I glob:foo.txt" with matcher of
902 # "path:dir2", we don't want to remove "dir2" from the set.
902 # "path:dir2", we don't want to remove "dir2" from the set.
903 return self._m1.files() + self._m2.files()
903 return self._m1.files() + self._m2.files()
904
904
905 def matchfn(self, f):
905 def matchfn(self, f):
906 return self._m1(f) and self._m2(f)
906 return self._m1(f) and self._m2(f)
907
907
908 def visitdir(self, dir):
908 def visitdir(self, dir):
909 visit1 = self._m1.visitdir(dir)
909 visit1 = self._m1.visitdir(dir)
910 if visit1 == b'all':
910 if visit1 == b'all':
911 return self._m2.visitdir(dir)
911 return self._m2.visitdir(dir)
912 # bool() because visit1=True + visit2='all' should not be 'all'
912 # bool() because visit1=True + visit2='all' should not be 'all'
913 return bool(visit1 and self._m2.visitdir(dir))
913 return bool(visit1 and self._m2.visitdir(dir))
914
914
915 def visitchildrenset(self, dir):
915 def visitchildrenset(self, dir):
916 m1_set = self._m1.visitchildrenset(dir)
916 m1_set = self._m1.visitchildrenset(dir)
917 if not m1_set:
917 if not m1_set:
918 return set()
918 return set()
919 m2_set = self._m2.visitchildrenset(dir)
919 m2_set = self._m2.visitchildrenset(dir)
920 if not m2_set:
920 if not m2_set:
921 return set()
921 return set()
922
922
923 if m1_set == b'all':
923 if m1_set == b'all':
924 return m2_set
924 return m2_set
925 elif m2_set == b'all':
925 elif m2_set == b'all':
926 return m1_set
926 return m1_set
927
927
928 if m1_set == b'this' or m2_set == b'this':
928 if m1_set == b'this' or m2_set == b'this':
929 return b'this'
929 return b'this'
930
930
931 assert isinstance(m1_set, set) and isinstance(m2_set, set)
931 assert isinstance(m1_set, set) and isinstance(m2_set, set)
932 return m1_set.intersection(m2_set)
932 return m1_set.intersection(m2_set)
933
933
934 def always(self):
934 def always(self):
935 return self._m1.always() and self._m2.always()
935 return self._m1.always() and self._m2.always()
936
936
937 def isexact(self):
937 def isexact(self):
938 return self._m1.isexact() or self._m2.isexact()
938 return self._m1.isexact() or self._m2.isexact()
939
939
940 @encoding.strmethod
940 @encoding.strmethod
941 def __repr__(self):
941 def __repr__(self):
942 return b'<intersectionmatcher m1=%r, m2=%r>' % (self._m1, self._m2)
942 return b'<intersectionmatcher m1=%r, m2=%r>' % (self._m1, self._m2)
943
943
944
944
945 class subdirmatcher(basematcher):
945 class subdirmatcher(basematcher):
946 """Adapt a matcher to work on a subdirectory only.
946 """Adapt a matcher to work on a subdirectory only.
947
947
948 The paths are remapped to remove/insert the path as needed:
948 The paths are remapped to remove/insert the path as needed:
949
949
950 >>> from . import pycompat
950 >>> from . import pycompat
951 >>> m1 = match(util.localpath(b'/root'), b'', [b'a.txt', b'sub/b.txt'], auditor=lambda name: None)
951 >>> m1 = match(util.localpath(b'/root'), b'', [b'a.txt', b'sub/b.txt'], auditor=lambda name: None)
952 >>> m2 = subdirmatcher(b'sub', m1)
952 >>> m2 = subdirmatcher(b'sub', m1)
953 >>> m2(b'a.txt')
953 >>> m2(b'a.txt')
954 False
954 False
955 >>> m2(b'b.txt')
955 >>> m2(b'b.txt')
956 True
956 True
957 >>> m2.matchfn(b'a.txt')
957 >>> m2.matchfn(b'a.txt')
958 False
958 False
959 >>> m2.matchfn(b'b.txt')
959 >>> m2.matchfn(b'b.txt')
960 True
960 True
961 >>> m2.files()
961 >>> m2.files()
962 ['b.txt']
962 ['b.txt']
963 >>> m2.exact(b'b.txt')
963 >>> m2.exact(b'b.txt')
964 True
964 True
965 >>> def bad(f, msg):
965 >>> def bad(f, msg):
966 ... print(pycompat.sysstr(b"%s: %s" % (f, msg)))
966 ... print(pycompat.sysstr(b"%s: %s" % (f, msg)))
967 >>> m1.bad = bad
967 >>> m1.bad = bad
968 >>> m2.bad(b'x.txt', b'No such file')
968 >>> m2.bad(b'x.txt', b'No such file')
969 sub/x.txt: No such file
969 sub/x.txt: No such file
970 """
970 """
971
971
972 def __init__(self, path, matcher):
972 def __init__(self, path, matcher):
973 super(subdirmatcher, self).__init__()
973 super(subdirmatcher, self).__init__()
974 self._path = path
974 self._path = path
975 self._matcher = matcher
975 self._matcher = matcher
976 self._always = matcher.always()
976 self._always = matcher.always()
977
977
978 self._files = [
978 self._files = [
979 f[len(path) + 1 :]
979 f[len(path) + 1 :]
980 for f in matcher._files
980 for f in matcher._files
981 if f.startswith(path + b"/")
981 if f.startswith(path + b"/")
982 ]
982 ]
983
983
984 # If the parent repo had a path to this subrepo and the matcher is
984 # If the parent repo had a path to this subrepo and the matcher is
985 # a prefix matcher, this submatcher always matches.
985 # a prefix matcher, this submatcher always matches.
986 if matcher.prefix():
986 if matcher.prefix():
987 self._always = any(f == path for f in matcher._files)
987 self._always = any(f == path for f in matcher._files)
988
988
989 def bad(self, f, msg):
989 def bad(self, f, msg):
990 self._matcher.bad(self._path + b"/" + f, msg)
990 self._matcher.bad(self._path + b"/" + f, msg)
991
991
992 def matchfn(self, f):
992 def matchfn(self, f):
993 # Some information is lost in the superclass's constructor, so we
993 # Some information is lost in the superclass's constructor, so we
994 # can not accurately create the matching function for the subdirectory
994 # can not accurately create the matching function for the subdirectory
995 # from the inputs. Instead, we override matchfn() and visitdir() to
995 # from the inputs. Instead, we override matchfn() and visitdir() to
996 # call the original matcher with the subdirectory path prepended.
996 # call the original matcher with the subdirectory path prepended.
997 return self._matcher.matchfn(self._path + b"/" + f)
997 return self._matcher.matchfn(self._path + b"/" + f)
998
998
999 def visitdir(self, dir):
999 def visitdir(self, dir):
1000 if dir == b'':
1000 if dir == b'':
1001 dir = self._path
1001 dir = self._path
1002 else:
1002 else:
1003 dir = self._path + b"/" + dir
1003 dir = self._path + b"/" + dir
1004 return self._matcher.visitdir(dir)
1004 return self._matcher.visitdir(dir)
1005
1005
1006 def visitchildrenset(self, dir):
1006 def visitchildrenset(self, dir):
1007 if dir == b'':
1007 if dir == b'':
1008 dir = self._path
1008 dir = self._path
1009 else:
1009 else:
1010 dir = self._path + b"/" + dir
1010 dir = self._path + b"/" + dir
1011 return self._matcher.visitchildrenset(dir)
1011 return self._matcher.visitchildrenset(dir)
1012
1012
1013 def always(self):
1013 def always(self):
1014 return self._always
1014 return self._always
1015
1015
1016 def prefix(self):
1016 def prefix(self):
1017 return self._matcher.prefix() and not self._always
1017 return self._matcher.prefix() and not self._always
1018
1018
1019 @encoding.strmethod
1019 @encoding.strmethod
1020 def __repr__(self):
1020 def __repr__(self):
1021 return b'<subdirmatcher path=%r, matcher=%r>' % (
1021 return b'<subdirmatcher path=%r, matcher=%r>' % (
1022 self._path,
1022 self._path,
1023 self._matcher,
1023 self._matcher,
1024 )
1024 )
1025
1025
1026
1026
1027 class prefixdirmatcher(basematcher):
1027 class prefixdirmatcher(basematcher):
1028 """Adapt a matcher to work on a parent directory.
1028 """Adapt a matcher to work on a parent directory.
1029
1029
1030 The matcher's non-matching-attributes (bad, traversedir) are ignored.
1030 The matcher's non-matching-attributes (bad, traversedir) are ignored.
1031
1031
1032 The prefix path should usually be the relative path from the root of
1032 The prefix path should usually be the relative path from the root of
1033 this matcher to the root of the wrapped matcher.
1033 this matcher to the root of the wrapped matcher.
1034
1034
1035 >>> m1 = match(util.localpath(b'/root/d/e'), b'f', [b'../a.txt', b'b.txt'], auditor=lambda name: None)
1035 >>> m1 = match(util.localpath(b'/root/d/e'), b'f', [b'../a.txt', b'b.txt'], auditor=lambda name: None)
1036 >>> m2 = prefixdirmatcher(b'd/e', m1)
1036 >>> m2 = prefixdirmatcher(b'd/e', m1)
1037 >>> m2(b'a.txt')
1037 >>> m2(b'a.txt')
1038 False
1038 False
1039 >>> m2(b'd/e/a.txt')
1039 >>> m2(b'd/e/a.txt')
1040 True
1040 True
1041 >>> m2(b'd/e/b.txt')
1041 >>> m2(b'd/e/b.txt')
1042 False
1042 False
1043 >>> m2.files()
1043 >>> m2.files()
1044 ['d/e/a.txt', 'd/e/f/b.txt']
1044 ['d/e/a.txt', 'd/e/f/b.txt']
1045 >>> m2.exact(b'd/e/a.txt')
1045 >>> m2.exact(b'd/e/a.txt')
1046 True
1046 True
1047 >>> m2.visitdir(b'd')
1047 >>> m2.visitdir(b'd')
1048 True
1048 True
1049 >>> m2.visitdir(b'd/e')
1049 >>> m2.visitdir(b'd/e')
1050 True
1050 True
1051 >>> m2.visitdir(b'd/e/f')
1051 >>> m2.visitdir(b'd/e/f')
1052 True
1052 True
1053 >>> m2.visitdir(b'd/e/g')
1053 >>> m2.visitdir(b'd/e/g')
1054 False
1054 False
1055 >>> m2.visitdir(b'd/ef')
1055 >>> m2.visitdir(b'd/ef')
1056 False
1056 False
1057 """
1057 """
1058
1058
1059 def __init__(self, path, matcher, badfn=None):
1059 def __init__(self, path, matcher, badfn=None):
1060 super(prefixdirmatcher, self).__init__(badfn)
1060 super(prefixdirmatcher, self).__init__(badfn)
1061 if not path:
1061 if not path:
1062 raise error.ProgrammingError(b'prefix path must not be empty')
1062 raise error.ProgrammingError(b'prefix path must not be empty')
1063 self._path = path
1063 self._path = path
1064 self._pathprefix = path + b'/'
1064 self._pathprefix = path + b'/'
1065 self._matcher = matcher
1065 self._matcher = matcher
1066
1066
1067 @propertycache
1067 @propertycache
1068 def _files(self):
1068 def _files(self):
1069 return [self._pathprefix + f for f in self._matcher._files]
1069 return [self._pathprefix + f for f in self._matcher._files]
1070
1070
1071 def matchfn(self, f):
1071 def matchfn(self, f):
1072 if not f.startswith(self._pathprefix):
1072 if not f.startswith(self._pathprefix):
1073 return False
1073 return False
1074 return self._matcher.matchfn(f[len(self._pathprefix) :])
1074 return self._matcher.matchfn(f[len(self._pathprefix) :])
1075
1075
1076 @propertycache
1076 @propertycache
1077 def _pathdirs(self):
1077 def _pathdirs(self):
1078 return set(pathutil.finddirs(self._path))
1078 return set(pathutil.finddirs(self._path))
1079
1079
1080 def visitdir(self, dir):
1080 def visitdir(self, dir):
1081 if dir == self._path:
1081 if dir == self._path:
1082 return self._matcher.visitdir(b'')
1082 return self._matcher.visitdir(b'')
1083 if dir.startswith(self._pathprefix):
1083 if dir.startswith(self._pathprefix):
1084 return self._matcher.visitdir(dir[len(self._pathprefix) :])
1084 return self._matcher.visitdir(dir[len(self._pathprefix) :])
1085 return dir in self._pathdirs
1085 return dir in self._pathdirs
1086
1086
1087 def visitchildrenset(self, dir):
1087 def visitchildrenset(self, dir):
1088 if dir == self._path:
1088 if dir == self._path:
1089 return self._matcher.visitchildrenset(b'')
1089 return self._matcher.visitchildrenset(b'')
1090 if dir.startswith(self._pathprefix):
1090 if dir.startswith(self._pathprefix):
1091 return self._matcher.visitchildrenset(dir[len(self._pathprefix) :])
1091 return self._matcher.visitchildrenset(dir[len(self._pathprefix) :])
1092 if dir in self._pathdirs:
1092 if dir in self._pathdirs:
1093 return b'this'
1093 return b'this'
1094 return set()
1094 return set()
1095
1095
1096 def isexact(self):
1096 def isexact(self):
1097 return self._matcher.isexact()
1097 return self._matcher.isexact()
1098
1098
1099 def prefix(self):
1099 def prefix(self):
1100 return self._matcher.prefix()
1100 return self._matcher.prefix()
1101
1101
1102 @encoding.strmethod
1102 @encoding.strmethod
1103 def __repr__(self):
1103 def __repr__(self):
1104 return b'<prefixdirmatcher path=%r, matcher=%r>' % (
1104 return b'<prefixdirmatcher path=%r, matcher=%r>' % (
1105 pycompat.bytestr(self._path),
1105 pycompat.bytestr(self._path),
1106 self._matcher,
1106 self._matcher,
1107 )
1107 )
1108
1108
1109
1109
1110 class unionmatcher(basematcher):
1110 class unionmatcher(basematcher):
1111 """A matcher that is the union of several matchers.
1111 """A matcher that is the union of several matchers.
1112
1112
1113 The non-matching-attributes (bad, traversedir) are taken from the first
1113 The non-matching-attributes (bad, traversedir) are taken from the first
1114 matcher.
1114 matcher.
1115 """
1115 """
1116
1116
1117 def __init__(self, matchers):
1117 def __init__(self, matchers):
1118 m1 = matchers[0]
1118 m1 = matchers[0]
1119 super(unionmatcher, self).__init__()
1119 super(unionmatcher, self).__init__()
1120 self.traversedir = m1.traversedir
1120 self.traversedir = m1.traversedir
1121 self._matchers = matchers
1121 self._matchers = matchers
1122
1122
1123 def matchfn(self, f):
1123 def matchfn(self, f):
1124 for match in self._matchers:
1124 for match in self._matchers:
1125 if match(f):
1125 if match(f):
1126 return True
1126 return True
1127 return False
1127 return False
1128
1128
1129 def visitdir(self, dir):
1129 def visitdir(self, dir):
1130 r = False
1130 r = False
1131 for m in self._matchers:
1131 for m in self._matchers:
1132 v = m.visitdir(dir)
1132 v = m.visitdir(dir)
1133 if v == b'all':
1133 if v == b'all':
1134 return v
1134 return v
1135 r |= v
1135 r |= v
1136 return r
1136 return r
1137
1137
1138 def visitchildrenset(self, dir):
1138 def visitchildrenset(self, dir):
1139 r = set()
1139 r = set()
1140 this = False
1140 this = False
1141 for m in self._matchers:
1141 for m in self._matchers:
1142 v = m.visitchildrenset(dir)
1142 v = m.visitchildrenset(dir)
1143 if not v:
1143 if not v:
1144 continue
1144 continue
1145 if v == b'all':
1145 if v == b'all':
1146 return v
1146 return v
1147 if this or v == b'this':
1147 if this or v == b'this':
1148 this = True
1148 this = True
1149 # don't break, we might have an 'all' in here.
1149 # don't break, we might have an 'all' in here.
1150 continue
1150 continue
1151 assert isinstance(v, set)
1151 assert isinstance(v, set)
1152 r = r.union(v)
1152 r = r.union(v)
1153 if this:
1153 if this:
1154 return b'this'
1154 return b'this'
1155 return r
1155 return r
1156
1156
1157 @encoding.strmethod
1157 @encoding.strmethod
1158 def __repr__(self):
1158 def __repr__(self):
1159 return b'<unionmatcher matchers=%r>' % self._matchers
1159 return b'<unionmatcher matchers=%r>' % self._matchers
1160
1160
1161
1161
1162 def patkind(pattern, default=None):
1162 def patkind(pattern, default=None):
1163 r'''If pattern is 'kind:pat' with a known kind, return kind.
1163 r'''If pattern is 'kind:pat' with a known kind, return kind.
1164
1164
1165 >>> patkind(br're:.*\.c$')
1165 >>> patkind(br're:.*\.c$')
1166 're'
1166 're'
1167 >>> patkind(b'glob:*.c')
1167 >>> patkind(b'glob:*.c')
1168 'glob'
1168 'glob'
1169 >>> patkind(b'relpath:test.py')
1169 >>> patkind(b'relpath:test.py')
1170 'relpath'
1170 'relpath'
1171 >>> patkind(b'main.py')
1171 >>> patkind(b'main.py')
1172 >>> patkind(b'main.py', default=b're')
1172 >>> patkind(b'main.py', default=b're')
1173 're'
1173 're'
1174 '''
1174 '''
1175 return _patsplit(pattern, default)[0]
1175 return _patsplit(pattern, default)[0]
1176
1176
1177
1177
1178 def _patsplit(pattern, default):
1178 def _patsplit(pattern, default):
1179 """Split a string into the optional pattern kind prefix and the actual
1179 """Split a string into the optional pattern kind prefix and the actual
1180 pattern."""
1180 pattern."""
1181 if b':' in pattern:
1181 if b':' in pattern:
1182 kind, pat = pattern.split(b':', 1)
1182 kind, pat = pattern.split(b':', 1)
1183 if kind in allpatternkinds:
1183 if kind in allpatternkinds:
1184 return kind, pat
1184 return kind, pat
1185 return default, pattern
1185 return default, pattern
1186
1186
1187
1187
1188 def _globre(pat):
1188 def _globre(pat):
1189 r'''Convert an extended glob string to a regexp string.
1189 r'''Convert an extended glob string to a regexp string.
1190
1190
1191 >>> from . import pycompat
1191 >>> from . import pycompat
1192 >>> def bprint(s):
1192 >>> def bprint(s):
1193 ... print(pycompat.sysstr(s))
1193 ... print(pycompat.sysstr(s))
1194 >>> bprint(_globre(br'?'))
1194 >>> bprint(_globre(br'?'))
1195 .
1195 .
1196 >>> bprint(_globre(br'*'))
1196 >>> bprint(_globre(br'*'))
1197 [^/]*
1197 [^/]*
1198 >>> bprint(_globre(br'**'))
1198 >>> bprint(_globre(br'**'))
1199 .*
1199 .*
1200 >>> bprint(_globre(br'**/a'))
1200 >>> bprint(_globre(br'**/a'))
1201 (?:.*/)?a
1201 (?:.*/)?a
1202 >>> bprint(_globre(br'a/**/b'))
1202 >>> bprint(_globre(br'a/**/b'))
1203 a/(?:.*/)?b
1203 a/(?:.*/)?b
1204 >>> bprint(_globre(br'[a*?!^][^b][!c]'))
1204 >>> bprint(_globre(br'[a*?!^][^b][!c]'))
1205 [a*?!^][\^b][^c]
1205 [a*?!^][\^b][^c]
1206 >>> bprint(_globre(br'{a,b}'))
1206 >>> bprint(_globre(br'{a,b}'))
1207 (?:a|b)
1207 (?:a|b)
1208 >>> bprint(_globre(br'.\*\?'))
1208 >>> bprint(_globre(br'.\*\?'))
1209 \.\*\?
1209 \.\*\?
1210 '''
1210 '''
1211 i, n = 0, len(pat)
1211 i, n = 0, len(pat)
1212 res = b''
1212 res = b''
1213 group = 0
1213 group = 0
1214 escape = util.stringutil.regexbytesescapemap.get
1214 escape = util.stringutil.regexbytesescapemap.get
1215
1215
1216 def peek():
1216 def peek():
1217 return i < n and pat[i : i + 1]
1217 return i < n and pat[i : i + 1]
1218
1218
1219 while i < n:
1219 while i < n:
1220 c = pat[i : i + 1]
1220 c = pat[i : i + 1]
1221 i += 1
1221 i += 1
1222 if c not in b'*?[{},\\':
1222 if c not in b'*?[{},\\':
1223 res += escape(c, c)
1223 res += escape(c, c)
1224 elif c == b'*':
1224 elif c == b'*':
1225 if peek() == b'*':
1225 if peek() == b'*':
1226 i += 1
1226 i += 1
1227 if peek() == b'/':
1227 if peek() == b'/':
1228 i += 1
1228 i += 1
1229 res += b'(?:.*/)?'
1229 res += b'(?:.*/)?'
1230 else:
1230 else:
1231 res += b'.*'
1231 res += b'.*'
1232 else:
1232 else:
1233 res += b'[^/]*'
1233 res += b'[^/]*'
1234 elif c == b'?':
1234 elif c == b'?':
1235 res += b'.'
1235 res += b'.'
1236 elif c == b'[':
1236 elif c == b'[':
1237 j = i
1237 j = i
1238 if j < n and pat[j : j + 1] in b'!]':
1238 if j < n and pat[j : j + 1] in b'!]':
1239 j += 1
1239 j += 1
1240 while j < n and pat[j : j + 1] != b']':
1240 while j < n and pat[j : j + 1] != b']':
1241 j += 1
1241 j += 1
1242 if j >= n:
1242 if j >= n:
1243 res += b'\\['
1243 res += b'\\['
1244 else:
1244 else:
1245 stuff = pat[i:j].replace(b'\\', b'\\\\')
1245 stuff = pat[i:j].replace(b'\\', b'\\\\')
1246 i = j + 1
1246 i = j + 1
1247 if stuff[0:1] == b'!':
1247 if stuff[0:1] == b'!':
1248 stuff = b'^' + stuff[1:]
1248 stuff = b'^' + stuff[1:]
1249 elif stuff[0:1] == b'^':
1249 elif stuff[0:1] == b'^':
1250 stuff = b'\\' + stuff
1250 stuff = b'\\' + stuff
1251 res = b'%s[%s]' % (res, stuff)
1251 res = b'%s[%s]' % (res, stuff)
1252 elif c == b'{':
1252 elif c == b'{':
1253 group += 1
1253 group += 1
1254 res += b'(?:'
1254 res += b'(?:'
1255 elif c == b'}' and group:
1255 elif c == b'}' and group:
1256 res += b')'
1256 res += b')'
1257 group -= 1
1257 group -= 1
1258 elif c == b',' and group:
1258 elif c == b',' and group:
1259 res += b'|'
1259 res += b'|'
1260 elif c == b'\\':
1260 elif c == b'\\':
1261 p = peek()
1261 p = peek()
1262 if p:
1262 if p:
1263 i += 1
1263 i += 1
1264 res += escape(p, p)
1264 res += escape(p, p)
1265 else:
1265 else:
1266 res += escape(c, c)
1266 res += escape(c, c)
1267 else:
1267 else:
1268 res += escape(c, c)
1268 res += escape(c, c)
1269 return res
1269 return res
1270
1270
1271
1271
1272 def _regex(kind, pat, globsuffix):
1272 def _regex(kind, pat, globsuffix):
1273 '''Convert a (normalized) pattern of any kind into a
1273 '''Convert a (normalized) pattern of any kind into a
1274 regular expression.
1274 regular expression.
1275 globsuffix is appended to the regexp of globs.'''
1275 globsuffix is appended to the regexp of globs.'''
1276
1277 if rustmod is not None:
1278 try:
1279 return rustmod.build_single_regex(kind, pat, globsuffix)
1280 except rustmod.PatternError:
1281 raise error.ProgrammingError(
1282 b'not a regex pattern: %s:%s' % (kind, pat)
1283 )
1284
1285 if not pat and kind in (b'glob', b'relpath'):
1276 if not pat and kind in (b'glob', b'relpath'):
1286 return b''
1277 return b''
1287 if kind == b're':
1278 if kind == b're':
1288 return pat
1279 return pat
1289 if kind in (b'path', b'relpath'):
1280 if kind in (b'path', b'relpath'):
1290 if pat == b'.':
1281 if pat == b'.':
1291 return b''
1282 return b''
1292 return util.stringutil.reescape(pat) + b'(?:/|$)'
1283 return util.stringutil.reescape(pat) + b'(?:/|$)'
1293 if kind == b'rootfilesin':
1284 if kind == b'rootfilesin':
1294 if pat == b'.':
1285 if pat == b'.':
1295 escaped = b''
1286 escaped = b''
1296 else:
1287 else:
1297 # Pattern is a directory name.
1288 # Pattern is a directory name.
1298 escaped = util.stringutil.reescape(pat) + b'/'
1289 escaped = util.stringutil.reescape(pat) + b'/'
1299 # Anything after the pattern must be a non-directory.
1290 # Anything after the pattern must be a non-directory.
1300 return escaped + b'[^/]+$'
1291 return escaped + b'[^/]+$'
1301 if kind == b'relglob':
1292 if kind == b'relglob':
1302 globre = _globre(pat)
1293 globre = _globre(pat)
1303 if globre.startswith(b'[^/]*'):
1294 if globre.startswith(b'[^/]*'):
1304 # When pat has the form *XYZ (common), make the returned regex more
1295 # When pat has the form *XYZ (common), make the returned regex more
1305 # legible by returning the regex for **XYZ instead of **/*XYZ.
1296 # legible by returning the regex for **XYZ instead of **/*XYZ.
1306 return b'.*' + globre[len(b'[^/]*') :] + globsuffix
1297 return b'.*' + globre[len(b'[^/]*') :] + globsuffix
1307 return b'(?:|.*/)' + globre + globsuffix
1298 return b'(?:|.*/)' + globre + globsuffix
1308 if kind == b'relre':
1299 if kind == b'relre':
1309 if pat.startswith(b'^'):
1300 if pat.startswith(b'^'):
1310 return pat
1301 return pat
1311 return b'.*' + pat
1302 return b'.*' + pat
1312 if kind in (b'glob', b'rootglob'):
1303 if kind in (b'glob', b'rootglob'):
1313 return _globre(pat) + globsuffix
1304 return _globre(pat) + globsuffix
1314 raise error.ProgrammingError(b'not a regex pattern: %s:%s' % (kind, pat))
1305 raise error.ProgrammingError(b'not a regex pattern: %s:%s' % (kind, pat))
1315
1306
1316
1307
1317 def _buildmatch(kindpats, globsuffix, root):
1308 def _buildmatch(kindpats, globsuffix, root):
1318 '''Return regexp string and a matcher function for kindpats.
1309 '''Return regexp string and a matcher function for kindpats.
1319 globsuffix is appended to the regexp of globs.'''
1310 globsuffix is appended to the regexp of globs.'''
1320 matchfuncs = []
1311 matchfuncs = []
1321
1312
1322 subincludes, kindpats = _expandsubinclude(kindpats, root)
1313 subincludes, kindpats = _expandsubinclude(kindpats, root)
1323 if subincludes:
1314 if subincludes:
1324 submatchers = {}
1315 submatchers = {}
1325
1316
1326 def matchsubinclude(f):
1317 def matchsubinclude(f):
1327 for prefix, matcherargs in subincludes:
1318 for prefix, matcherargs in subincludes:
1328 if f.startswith(prefix):
1319 if f.startswith(prefix):
1329 mf = submatchers.get(prefix)
1320 mf = submatchers.get(prefix)
1330 if mf is None:
1321 if mf is None:
1331 mf = match(*matcherargs)
1322 mf = match(*matcherargs)
1332 submatchers[prefix] = mf
1323 submatchers[prefix] = mf
1333
1324
1334 if mf(f[len(prefix) :]):
1325 if mf(f[len(prefix) :]):
1335 return True
1326 return True
1336 return False
1327 return False
1337
1328
1338 matchfuncs.append(matchsubinclude)
1329 matchfuncs.append(matchsubinclude)
1339
1330
1340 regex = b''
1331 regex = b''
1341 if kindpats:
1332 if kindpats:
1342 if all(k == b'rootfilesin' for k, p, s in kindpats):
1333 if all(k == b'rootfilesin' for k, p, s in kindpats):
1343 dirs = {p for k, p, s in kindpats}
1334 dirs = {p for k, p, s in kindpats}
1344
1335
1345 def mf(f):
1336 def mf(f):
1346 i = f.rfind(b'/')
1337 i = f.rfind(b'/')
1347 if i >= 0:
1338 if i >= 0:
1348 dir = f[:i]
1339 dir = f[:i]
1349 else:
1340 else:
1350 dir = b'.'
1341 dir = b'.'
1351 return dir in dirs
1342 return dir in dirs
1352
1343
1353 regex = b'rootfilesin: %s' % stringutil.pprint(list(sorted(dirs)))
1344 regex = b'rootfilesin: %s' % stringutil.pprint(list(sorted(dirs)))
1354 matchfuncs.append(mf)
1345 matchfuncs.append(mf)
1355 else:
1346 else:
1356 regex, mf = _buildregexmatch(kindpats, globsuffix)
1347 regex, mf = _buildregexmatch(kindpats, globsuffix)
1357 matchfuncs.append(mf)
1348 matchfuncs.append(mf)
1358
1349
1359 if len(matchfuncs) == 1:
1350 if len(matchfuncs) == 1:
1360 return regex, matchfuncs[0]
1351 return regex, matchfuncs[0]
1361 else:
1352 else:
1362 return regex, lambda f: any(mf(f) for mf in matchfuncs)
1353 return regex, lambda f: any(mf(f) for mf in matchfuncs)
1363
1354
1364
1355
1365 MAX_RE_SIZE = 20000
1356 MAX_RE_SIZE = 20000
1366
1357
1367
1358
1368 def _joinregexes(regexps):
1359 def _joinregexes(regexps):
1369 """gather multiple regular expressions into a single one"""
1360 """gather multiple regular expressions into a single one"""
1370 return b'|'.join(regexps)
1361 return b'|'.join(regexps)
1371
1362
1372
1363
1373 def _buildregexmatch(kindpats, globsuffix):
1364 def _buildregexmatch(kindpats, globsuffix):
1374 """Build a match function from a list of kinds and kindpats,
1365 """Build a match function from a list of kinds and kindpats,
1375 return regexp string and a matcher function.
1366 return regexp string and a matcher function.
1376
1367
1377 Test too large input
1368 Test too large input
1378 >>> _buildregexmatch([
1369 >>> _buildregexmatch([
1379 ... (b'relglob', b'?' * MAX_RE_SIZE, b'')
1370 ... (b'relglob', b'?' * MAX_RE_SIZE, b'')
1380 ... ], b'$')
1371 ... ], b'$')
1381 Traceback (most recent call last):
1372 Traceback (most recent call last):
1382 ...
1373 ...
1383 Abort: matcher pattern is too long (20009 bytes)
1374 Abort: matcher pattern is too long (20009 bytes)
1384 """
1375 """
1385 try:
1376 try:
1386 allgroups = []
1377 allgroups = []
1387 regexps = [_regex(k, p, globsuffix) for (k, p, s) in kindpats]
1378 regexps = [_regex(k, p, globsuffix) for (k, p, s) in kindpats]
1388 fullregexp = _joinregexes(regexps)
1379 fullregexp = _joinregexes(regexps)
1389
1380
1390 startidx = 0
1381 startidx = 0
1391 groupsize = 0
1382 groupsize = 0
1392 for idx, r in enumerate(regexps):
1383 for idx, r in enumerate(regexps):
1393 piecesize = len(r)
1384 piecesize = len(r)
1394 if piecesize > MAX_RE_SIZE:
1385 if piecesize > MAX_RE_SIZE:
1395 msg = _(b"matcher pattern is too long (%d bytes)") % piecesize
1386 msg = _(b"matcher pattern is too long (%d bytes)") % piecesize
1396 raise error.Abort(msg)
1387 raise error.Abort(msg)
1397 elif (groupsize + piecesize) > MAX_RE_SIZE:
1388 elif (groupsize + piecesize) > MAX_RE_SIZE:
1398 group = regexps[startidx:idx]
1389 group = regexps[startidx:idx]
1399 allgroups.append(_joinregexes(group))
1390 allgroups.append(_joinregexes(group))
1400 startidx = idx
1391 startidx = idx
1401 groupsize = 0
1392 groupsize = 0
1402 groupsize += piecesize + 1
1393 groupsize += piecesize + 1
1403
1394
1404 if startidx == 0:
1395 if startidx == 0:
1405 matcher = _rematcher(fullregexp)
1396 matcher = _rematcher(fullregexp)
1406 func = lambda s: bool(matcher(s))
1397 func = lambda s: bool(matcher(s))
1407 else:
1398 else:
1408 group = regexps[startidx:]
1399 group = regexps[startidx:]
1409 allgroups.append(_joinregexes(group))
1400 allgroups.append(_joinregexes(group))
1410 allmatchers = [_rematcher(g) for g in allgroups]
1401 allmatchers = [_rematcher(g) for g in allgroups]
1411 func = lambda s: any(m(s) for m in allmatchers)
1402 func = lambda s: any(m(s) for m in allmatchers)
1412 return fullregexp, func
1403 return fullregexp, func
1413 except re.error:
1404 except re.error:
1414 for k, p, s in kindpats:
1405 for k, p, s in kindpats:
1415 try:
1406 try:
1416 _rematcher(_regex(k, p, globsuffix))
1407 _rematcher(_regex(k, p, globsuffix))
1417 except re.error:
1408 except re.error:
1418 if s:
1409 if s:
1419 raise error.Abort(
1410 raise error.Abort(
1420 _(b"%s: invalid pattern (%s): %s") % (s, k, p)
1411 _(b"%s: invalid pattern (%s): %s") % (s, k, p)
1421 )
1412 )
1422 else:
1413 else:
1423 raise error.Abort(_(b"invalid pattern (%s): %s") % (k, p))
1414 raise error.Abort(_(b"invalid pattern (%s): %s") % (k, p))
1424 raise error.Abort(_(b"invalid pattern"))
1415 raise error.Abort(_(b"invalid pattern"))
1425
1416
1426
1417
1427 def _patternrootsanddirs(kindpats):
1418 def _patternrootsanddirs(kindpats):
1428 '''Returns roots and directories corresponding to each pattern.
1419 '''Returns roots and directories corresponding to each pattern.
1429
1420
1430 This calculates the roots and directories exactly matching the patterns and
1421 This calculates the roots and directories exactly matching the patterns and
1431 returns a tuple of (roots, dirs) for each. It does not return other
1422 returns a tuple of (roots, dirs) for each. It does not return other
1432 directories which may also need to be considered, like the parent
1423 directories which may also need to be considered, like the parent
1433 directories.
1424 directories.
1434 '''
1425 '''
1435 r = []
1426 r = []
1436 d = []
1427 d = []
1437 for kind, pat, source in kindpats:
1428 for kind, pat, source in kindpats:
1438 if kind in (b'glob', b'rootglob'): # find the non-glob prefix
1429 if kind in (b'glob', b'rootglob'): # find the non-glob prefix
1439 root = []
1430 root = []
1440 for p in pat.split(b'/'):
1431 for p in pat.split(b'/'):
1441 if b'[' in p or b'{' in p or b'*' in p or b'?' in p:
1432 if b'[' in p or b'{' in p or b'*' in p or b'?' in p:
1442 break
1433 break
1443 root.append(p)
1434 root.append(p)
1444 r.append(b'/'.join(root))
1435 r.append(b'/'.join(root))
1445 elif kind in (b'relpath', b'path'):
1436 elif kind in (b'relpath', b'path'):
1446 if pat == b'.':
1437 if pat == b'.':
1447 pat = b''
1438 pat = b''
1448 r.append(pat)
1439 r.append(pat)
1449 elif kind in (b'rootfilesin',):
1440 elif kind in (b'rootfilesin',):
1450 if pat == b'.':
1441 if pat == b'.':
1451 pat = b''
1442 pat = b''
1452 d.append(pat)
1443 d.append(pat)
1453 else: # relglob, re, relre
1444 else: # relglob, re, relre
1454 r.append(b'')
1445 r.append(b'')
1455 return r, d
1446 return r, d
1456
1447
1457
1448
1458 def _roots(kindpats):
1449 def _roots(kindpats):
1459 '''Returns root directories to match recursively from the given patterns.'''
1450 '''Returns root directories to match recursively from the given patterns.'''
1460 roots, dirs = _patternrootsanddirs(kindpats)
1451 roots, dirs = _patternrootsanddirs(kindpats)
1461 return roots
1452 return roots
1462
1453
1463
1454
1464 def _rootsdirsandparents(kindpats):
1455 def _rootsdirsandparents(kindpats):
1465 '''Returns roots and exact directories from patterns.
1456 '''Returns roots and exact directories from patterns.
1466
1457
1467 `roots` are directories to match recursively, `dirs` should
1458 `roots` are directories to match recursively, `dirs` should
1468 be matched non-recursively, and `parents` are the implicitly required
1459 be matched non-recursively, and `parents` are the implicitly required
1469 directories to walk to items in either roots or dirs.
1460 directories to walk to items in either roots or dirs.
1470
1461
1471 Returns a tuple of (roots, dirs, parents).
1462 Returns a tuple of (roots, dirs, parents).
1472
1463
1473 >>> r = _rootsdirsandparents(
1464 >>> r = _rootsdirsandparents(
1474 ... [(b'glob', b'g/h/*', b''), (b'glob', b'g/h', b''),
1465 ... [(b'glob', b'g/h/*', b''), (b'glob', b'g/h', b''),
1475 ... (b'glob', b'g*', b'')])
1466 ... (b'glob', b'g*', b'')])
1476 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1467 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1477 (['g/h', 'g/h', ''], []) ['', 'g']
1468 (['g/h', 'g/h', ''], []) ['', 'g']
1478 >>> r = _rootsdirsandparents(
1469 >>> r = _rootsdirsandparents(
1479 ... [(b'rootfilesin', b'g/h', b''), (b'rootfilesin', b'', b'')])
1470 ... [(b'rootfilesin', b'g/h', b''), (b'rootfilesin', b'', b'')])
1480 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1471 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1481 ([], ['g/h', '']) ['', 'g']
1472 ([], ['g/h', '']) ['', 'g']
1482 >>> r = _rootsdirsandparents(
1473 >>> r = _rootsdirsandparents(
1483 ... [(b'relpath', b'r', b''), (b'path', b'p/p', b''),
1474 ... [(b'relpath', b'r', b''), (b'path', b'p/p', b''),
1484 ... (b'path', b'', b'')])
1475 ... (b'path', b'', b'')])
1485 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1476 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1486 (['r', 'p/p', ''], []) ['', 'p']
1477 (['r', 'p/p', ''], []) ['', 'p']
1487 >>> r = _rootsdirsandparents(
1478 >>> r = _rootsdirsandparents(
1488 ... [(b'relglob', b'rg*', b''), (b're', b're/', b''),
1479 ... [(b'relglob', b'rg*', b''), (b're', b're/', b''),
1489 ... (b'relre', b'rr', b'')])
1480 ... (b'relre', b'rr', b'')])
1490 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1481 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1491 (['', '', ''], []) ['']
1482 (['', '', ''], []) ['']
1492 '''
1483 '''
1493 r, d = _patternrootsanddirs(kindpats)
1484 r, d = _patternrootsanddirs(kindpats)
1494
1485
1495 p = set()
1486 p = set()
1496 # Add the parents as non-recursive/exact directories, since they must be
1487 # Add the parents as non-recursive/exact directories, since they must be
1497 # scanned to get to either the roots or the other exact directories.
1488 # scanned to get to either the roots or the other exact directories.
1498 p.update(pathutil.dirs(d))
1489 p.update(pathutil.dirs(d))
1499 p.update(pathutil.dirs(r))
1490 p.update(pathutil.dirs(r))
1500
1491
1501 # FIXME: all uses of this function convert these to sets, do so before
1492 # FIXME: all uses of this function convert these to sets, do so before
1502 # returning.
1493 # returning.
1503 # FIXME: all uses of this function do not need anything in 'roots' and
1494 # FIXME: all uses of this function do not need anything in 'roots' and
1504 # 'dirs' to also be in 'parents', consider removing them before returning.
1495 # 'dirs' to also be in 'parents', consider removing them before returning.
1505 return r, d, p
1496 return r, d, p
1506
1497
1507
1498
1508 def _explicitfiles(kindpats):
1499 def _explicitfiles(kindpats):
1509 '''Returns the potential explicit filenames from the patterns.
1500 '''Returns the potential explicit filenames from the patterns.
1510
1501
1511 >>> _explicitfiles([(b'path', b'foo/bar', b'')])
1502 >>> _explicitfiles([(b'path', b'foo/bar', b'')])
1512 ['foo/bar']
1503 ['foo/bar']
1513 >>> _explicitfiles([(b'rootfilesin', b'foo/bar', b'')])
1504 >>> _explicitfiles([(b'rootfilesin', b'foo/bar', b'')])
1514 []
1505 []
1515 '''
1506 '''
1516 # Keep only the pattern kinds where one can specify filenames (vs only
1507 # Keep only the pattern kinds where one can specify filenames (vs only
1517 # directory names).
1508 # directory names).
1518 filable = [kp for kp in kindpats if kp[0] not in (b'rootfilesin',)]
1509 filable = [kp for kp in kindpats if kp[0] not in (b'rootfilesin',)]
1519 return _roots(filable)
1510 return _roots(filable)
1520
1511
1521
1512
1522 def _prefix(kindpats):
1513 def _prefix(kindpats):
1523 '''Whether all the patterns match a prefix (i.e. recursively)'''
1514 '''Whether all the patterns match a prefix (i.e. recursively)'''
1524 for kind, pat, source in kindpats:
1515 for kind, pat, source in kindpats:
1525 if kind not in (b'path', b'relpath'):
1516 if kind not in (b'path', b'relpath'):
1526 return False
1517 return False
1527 return True
1518 return True
1528
1519
1529
1520
1530 _commentre = None
1521 _commentre = None
1531
1522
1532
1523
1533 def readpatternfile(filepath, warn, sourceinfo=False):
1524 def readpatternfile(filepath, warn, sourceinfo=False):
1534 '''parse a pattern file, returning a list of
1525 '''parse a pattern file, returning a list of
1535 patterns. These patterns should be given to compile()
1526 patterns. These patterns should be given to compile()
1536 to be validated and converted into a match function.
1527 to be validated and converted into a match function.
1537
1528
1538 trailing white space is dropped.
1529 trailing white space is dropped.
1539 the escape character is backslash.
1530 the escape character is backslash.
1540 comments start with #.
1531 comments start with #.
1541 empty lines are skipped.
1532 empty lines are skipped.
1542
1533
1543 lines can be of the following formats:
1534 lines can be of the following formats:
1544
1535
1545 syntax: regexp # defaults following lines to non-rooted regexps
1536 syntax: regexp # defaults following lines to non-rooted regexps
1546 syntax: glob # defaults following lines to non-rooted globs
1537 syntax: glob # defaults following lines to non-rooted globs
1547 re:pattern # non-rooted regular expression
1538 re:pattern # non-rooted regular expression
1548 glob:pattern # non-rooted glob
1539 glob:pattern # non-rooted glob
1549 rootglob:pat # rooted glob (same root as ^ in regexps)
1540 rootglob:pat # rooted glob (same root as ^ in regexps)
1550 pattern # pattern of the current default type
1541 pattern # pattern of the current default type
1551
1542
1552 if sourceinfo is set, returns a list of tuples:
1543 if sourceinfo is set, returns a list of tuples:
1553 (pattern, lineno, originalline).
1544 (pattern, lineno, originalline).
1554 This is useful to debug ignore patterns.
1545 This is useful to debug ignore patterns.
1555 '''
1546 '''
1556
1547
1557 if rustmod is not None:
1558 result, warnings = rustmod.read_pattern_file(
1559 filepath, bool(warn), sourceinfo,
1560 )
1561
1562 for warning_params in warnings:
1563 # Can't be easily emitted from Rust, because it would require
1564 # a mechanism for both gettext and calling the `warn` function.
1565 warn(_(b"%s: ignoring invalid syntax '%s'\n") % warning_params)
1566
1567 return result
1568
1569 syntaxes = {
1548 syntaxes = {
1570 b're': b'relre:',
1549 b're': b'relre:',
1571 b'regexp': b'relre:',
1550 b'regexp': b'relre:',
1572 b'glob': b'relglob:',
1551 b'glob': b'relglob:',
1573 b'rootglob': b'rootglob:',
1552 b'rootglob': b'rootglob:',
1574 b'include': b'include',
1553 b'include': b'include',
1575 b'subinclude': b'subinclude',
1554 b'subinclude': b'subinclude',
1576 }
1555 }
1577 syntax = b'relre:'
1556 syntax = b'relre:'
1578 patterns = []
1557 patterns = []
1579
1558
1580 fp = open(filepath, b'rb')
1559 fp = open(filepath, b'rb')
1581 for lineno, line in enumerate(util.iterfile(fp), start=1):
1560 for lineno, line in enumerate(util.iterfile(fp), start=1):
1582 if b"#" in line:
1561 if b"#" in line:
1583 global _commentre
1562 global _commentre
1584 if not _commentre:
1563 if not _commentre:
1585 _commentre = util.re.compile(br'((?:^|[^\\])(?:\\\\)*)#.*')
1564 _commentre = util.re.compile(br'((?:^|[^\\])(?:\\\\)*)#.*')
1586 # remove comments prefixed by an even number of escapes
1565 # remove comments prefixed by an even number of escapes
1587 m = _commentre.search(line)
1566 m = _commentre.search(line)
1588 if m:
1567 if m:
1589 line = line[: m.end(1)]
1568 line = line[: m.end(1)]
1590 # fixup properly escaped comments that survived the above
1569 # fixup properly escaped comments that survived the above
1591 line = line.replace(b"\\#", b"#")
1570 line = line.replace(b"\\#", b"#")
1592 line = line.rstrip()
1571 line = line.rstrip()
1593 if not line:
1572 if not line:
1594 continue
1573 continue
1595
1574
1596 if line.startswith(b'syntax:'):
1575 if line.startswith(b'syntax:'):
1597 s = line[7:].strip()
1576 s = line[7:].strip()
1598 try:
1577 try:
1599 syntax = syntaxes[s]
1578 syntax = syntaxes[s]
1600 except KeyError:
1579 except KeyError:
1601 if warn:
1580 if warn:
1602 warn(
1581 warn(
1603 _(b"%s: ignoring invalid syntax '%s'\n") % (filepath, s)
1582 _(b"%s: ignoring invalid syntax '%s'\n") % (filepath, s)
1604 )
1583 )
1605 continue
1584 continue
1606
1585
1607 linesyntax = syntax
1586 linesyntax = syntax
1608 for s, rels in pycompat.iteritems(syntaxes):
1587 for s, rels in pycompat.iteritems(syntaxes):
1609 if line.startswith(rels):
1588 if line.startswith(rels):
1610 linesyntax = rels
1589 linesyntax = rels
1611 line = line[len(rels) :]
1590 line = line[len(rels) :]
1612 break
1591 break
1613 elif line.startswith(s + b':'):
1592 elif line.startswith(s + b':'):
1614 linesyntax = rels
1593 linesyntax = rels
1615 line = line[len(s) + 1 :]
1594 line = line[len(s) + 1 :]
1616 break
1595 break
1617 if sourceinfo:
1596 if sourceinfo:
1618 patterns.append((linesyntax + line, lineno, line))
1597 patterns.append((linesyntax + line, lineno, line))
1619 else:
1598 else:
1620 patterns.append(linesyntax + line)
1599 patterns.append(linesyntax + line)
1621 fp.close()
1600 fp.close()
1622 return patterns
1601 return patterns
@@ -1,72 +1,44 b''
1 // ancestors.rs
1 // ancestors.rs
2 //
2 //
3 // Copyright 2018 Georges Racinet <gracinet@anybox.fr>
3 // Copyright 2018 Georges Racinet <gracinet@anybox.fr>
4 //
4 //
5 // This software may be used and distributed according to the terms of the
5 // This software may be used and distributed according to the terms of the
6 // GNU General Public License version 2 or any later version.
6 // GNU General Public License version 2 or any later version.
7
7
8 //! Bindings for Rust errors
8 //! Bindings for Rust errors
9 //!
9 //!
10 //! [`GraphError`] exposes `hg::GraphError` as a subclass of `ValueError`
10 //! [`GraphError`] exposes `hg::GraphError` as a subclass of `ValueError`
11 //! but some variants of `hg::GraphError` can be converted directly to other
11 //! but some variants of `hg::GraphError` can be converted directly to other
12 //! existing Python exceptions if appropriate.
12 //! existing Python exceptions if appropriate.
13 //!
13 //!
14 //! [`GraphError`]: struct.GraphError.html
14 //! [`GraphError`]: struct.GraphError.html
15 use cpython::{
15 use cpython::{
16 exc::{IOError, RuntimeError, ValueError},
16 exc::{RuntimeError, ValueError},
17 py_exception, PyErr, Python,
17 py_exception, PyErr, Python,
18 };
18 };
19 use hg;
19 use hg;
20
20
21 py_exception!(rustext, GraphError, ValueError);
21 py_exception!(rustext, GraphError, ValueError);
22
22
23 impl GraphError {
23 impl GraphError {
24 pub fn pynew(py: Python, inner: hg::GraphError) -> PyErr {
24 pub fn pynew(py: Python, inner: hg::GraphError) -> PyErr {
25 match inner {
25 match inner {
26 hg::GraphError::ParentOutOfRange(r) => {
26 hg::GraphError::ParentOutOfRange(r) => {
27 GraphError::new(py, ("ParentOutOfRange", r))
27 GraphError::new(py, ("ParentOutOfRange", r))
28 }
28 }
29 hg::GraphError::WorkingDirectoryUnsupported => {
29 hg::GraphError::WorkingDirectoryUnsupported => {
30 match py
30 match py
31 .import("mercurial.error")
31 .import("mercurial.error")
32 .and_then(|m| m.get(py, "WdirUnsupported"))
32 .and_then(|m| m.get(py, "WdirUnsupported"))
33 {
33 {
34 Err(e) => e,
34 Err(e) => e,
35 Ok(cls) => PyErr::from_instance(py, cls),
35 Ok(cls) => PyErr::from_instance(py, cls),
36 }
36 }
37 }
37 }
38 }
38 }
39 }
39 }
40 }
40 }
41
41
42 py_exception!(rustext, PatternError, RuntimeError);
43 py_exception!(rustext, PatternFileError, RuntimeError);
44 py_exception!(rustext, HgPathPyError, RuntimeError);
42 py_exception!(rustext, HgPathPyError, RuntimeError);
45
43
46 impl PatternError {
47 pub fn pynew(py: Python, inner: hg::PatternError) -> PyErr {
48 match inner {
49 hg::PatternError::UnsupportedSyntax(m) => {
50 PatternError::new(py, ("PatternError", m))
51 }
52 }
53 }
54 }
55
56 impl PatternFileError {
57 pub fn pynew(py: Python, inner: hg::PatternFileError) -> PyErr {
58 match inner {
59 hg::PatternFileError::IO(e) => {
60 let value = (e.raw_os_error().unwrap_or(2), e.to_string());
61 PyErr::new::<IOError, _>(py, value)
62 }
63 hg::PatternFileError::Pattern(e, l) => match e {
64 hg::PatternError::UnsupportedSyntax(m) => {
65 PatternFileError::new(py, ("PatternFileError", m, l))
66 }
67 },
68 }
69 }
70 }
71
72 py_exception!(shared_ref, AlreadyBorrowed, RuntimeError);
44 py_exception!(shared_ref, AlreadyBorrowed, RuntimeError);
@@ -1,83 +1,67 b''
1 // lib.rs
1 // lib.rs
2 //
2 //
3 // Copyright 2018 Georges Racinet <gracinet@anybox.fr>
3 // Copyright 2018 Georges Racinet <gracinet@anybox.fr>
4 //
4 //
5 // This software may be used and distributed according to the terms of the
5 // This software may be used and distributed according to the terms of the
6 // GNU General Public License version 2 or any later version.
6 // GNU General Public License version 2 or any later version.
7
7
8 //! Python bindings of `hg-core` objects using the `cpython` crate.
8 //! Python bindings of `hg-core` objects using the `cpython` crate.
9 //! Once compiled, the resulting single shared library object can be placed in
9 //! Once compiled, the resulting single shared library object can be placed in
10 //! the `mercurial` package directly as `rustext.so` or `rustext.dll`.
10 //! the `mercurial` package directly as `rustext.so` or `rustext.dll`.
11 //! It holds several modules, so that from the point of view of Python,
11 //! It holds several modules, so that from the point of view of Python,
12 //! it behaves as the `cext` package.
12 //! it behaves as the `cext` package.
13 //!
13 //!
14 //! Example:
14 //! Example:
15 //!
15 //!
16 //! ```text
16 //! ```text
17 //! >>> from mercurial.rustext import ancestor
17 //! >>> from mercurial.rustext import ancestor
18 //! >>> ancestor.__doc__
18 //! >>> ancestor.__doc__
19 //! 'Generic DAG ancestor algorithms - Rust implementation'
19 //! 'Generic DAG ancestor algorithms - Rust implementation'
20 //! ```
20 //! ```
21
21
22 /// This crate uses nested private macros, `extern crate` is still needed in
22 /// This crate uses nested private macros, `extern crate` is still needed in
23 /// 2018 edition.
23 /// 2018 edition.
24 #[macro_use]
24 #[macro_use]
25 extern crate cpython;
25 extern crate cpython;
26
26
27 pub mod ancestors;
27 pub mod ancestors;
28 mod cindex;
28 mod cindex;
29 mod conversion;
29 mod conversion;
30 #[macro_use]
30 #[macro_use]
31 pub mod ref_sharing;
31 pub mod ref_sharing;
32 pub mod dagops;
32 pub mod dagops;
33 pub mod dirstate;
33 pub mod dirstate;
34 pub mod discovery;
34 pub mod discovery;
35 pub mod exceptions;
35 pub mod exceptions;
36 pub mod filepatterns;
37 pub mod parsers;
36 pub mod parsers;
38 pub mod revlog;
37 pub mod revlog;
39 pub mod utils;
38 pub mod utils;
40
39
41 py_module_initializer!(rustext, initrustext, PyInit_rustext, |py, m| {
40 py_module_initializer!(rustext, initrustext, PyInit_rustext, |py, m| {
42 m.add(
41 m.add(
43 py,
42 py,
44 "__doc__",
43 "__doc__",
45 "Mercurial core concepts - Rust implementation",
44 "Mercurial core concepts - Rust implementation",
46 )?;
45 )?;
47
46
48 let dotted_name: String = m.get(py, "__name__")?.extract(py)?;
47 let dotted_name: String = m.get(py, "__name__")?.extract(py)?;
49 m.add(py, "ancestor", ancestors::init_module(py, &dotted_name)?)?;
48 m.add(py, "ancestor", ancestors::init_module(py, &dotted_name)?)?;
50 m.add(py, "dagop", dagops::init_module(py, &dotted_name)?)?;
49 m.add(py, "dagop", dagops::init_module(py, &dotted_name)?)?;
51 m.add(py, "discovery", discovery::init_module(py, &dotted_name)?)?;
50 m.add(py, "discovery", discovery::init_module(py, &dotted_name)?)?;
52 m.add(py, "dirstate", dirstate::init_module(py, &dotted_name)?)?;
51 m.add(py, "dirstate", dirstate::init_module(py, &dotted_name)?)?;
53 m.add(py, "revlog", revlog::init_module(py, &dotted_name)?)?;
52 m.add(py, "revlog", revlog::init_module(py, &dotted_name)?)?;
54 m.add(
53 m.add(
55 py,
54 py,
56 "filepatterns",
57 filepatterns::init_module(py, &dotted_name)?,
58 )?;
59 m.add(
60 py,
61 "parsers",
55 "parsers",
62 parsers::init_parsers_module(py, &dotted_name)?,
56 parsers::init_parsers_module(py, &dotted_name)?,
63 )?;
57 )?;
64 m.add(py, "GraphError", py.get_type::<exceptions::GraphError>())?;
58 m.add(py, "GraphError", py.get_type::<exceptions::GraphError>())?;
65 m.add(
66 py,
67 "PatternFileError",
68 py.get_type::<exceptions::PatternFileError>(),
69 )?;
70 m.add(
71 py,
72 "PatternError",
73 py.get_type::<exceptions::PatternError>(),
74 )?;
75 Ok(())
59 Ok(())
76 });
60 });
77
61
78 #[cfg(not(any(feature = "python27-bin", feature = "python3-bin")))]
62 #[cfg(not(any(feature = "python27-bin", feature = "python3-bin")))]
79 #[test]
63 #[test]
80 #[ignore]
64 #[ignore]
81 fn libpython_must_be_linked_to_run_tests() {
65 fn libpython_must_be_linked_to_run_tests() {
82 // stub function to tell that some tests wouldn't run
66 // stub function to tell that some tests wouldn't run
83 }
67 }
1 NO CONTENT: file was removed
NO CONTENT: file was removed
General Comments 0
You need to be logged in to leave comments. Login now