##// END OF EJS Templates
rust-filepatterns: call new Rust implementations from Python...
Raphaël Gomès -
r42516:c7652f74 default
parent child Browse files
Show More
@@ -1,1476 +1,1511 b''
1 # match.py - filename matching
1 # match.py - filename matching
2 #
2 #
3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import, print_function
8 from __future__ import absolute_import, print_function
9
9
10 import copy
10 import copy
11 import itertools
11 import itertools
12 import os
12 import os
13 import re
13 import re
14
14
15 from .i18n import _
15 from .i18n import _
16 from . import (
16 from . import (
17 encoding,
17 encoding,
18 error,
18 error,
19 pathutil,
19 pathutil,
20 pycompat,
20 pycompat,
21 util,
21 util,
22 )
22 )
23 from .utils import (
23 from .utils import (
24 stringutil,
24 stringutil,
25 )
25 )
26
26
27 try:
28 from . import rustext
29 rustext.__name__ # force actual import (see hgdemandimport)
30 except ImportError:
31 rustext = None
32
27 allpatternkinds = ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',
33 allpatternkinds = ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',
28 'rootglob',
34 'rootglob',
29 'listfile', 'listfile0', 'set', 'include', 'subinclude',
35 'listfile', 'listfile0', 'set', 'include', 'subinclude',
30 'rootfilesin')
36 'rootfilesin')
31 cwdrelativepatternkinds = ('relpath', 'glob')
37 cwdrelativepatternkinds = ('relpath', 'glob')
32
38
33 propertycache = util.propertycache
39 propertycache = util.propertycache
34
40
35 def _rematcher(regex):
41 def _rematcher(regex):
36 '''compile the regexp with the best available regexp engine and return a
42 '''compile the regexp with the best available regexp engine and return a
37 matcher function'''
43 matcher function'''
38 m = util.re.compile(regex)
44 m = util.re.compile(regex)
39 try:
45 try:
40 # slightly faster, provided by facebook's re2 bindings
46 # slightly faster, provided by facebook's re2 bindings
41 return m.test_match
47 return m.test_match
42 except AttributeError:
48 except AttributeError:
43 return m.match
49 return m.match
44
50
45 def _expandsets(kindpats, ctx=None, listsubrepos=False, badfn=None):
51 def _expandsets(kindpats, ctx=None, listsubrepos=False, badfn=None):
46 '''Returns the kindpats list with the 'set' patterns expanded to matchers'''
52 '''Returns the kindpats list with the 'set' patterns expanded to matchers'''
47 matchers = []
53 matchers = []
48 other = []
54 other = []
49
55
50 for kind, pat, source in kindpats:
56 for kind, pat, source in kindpats:
51 if kind == 'set':
57 if kind == 'set':
52 if ctx is None:
58 if ctx is None:
53 raise error.ProgrammingError("fileset expression with no "
59 raise error.ProgrammingError("fileset expression with no "
54 "context")
60 "context")
55 matchers.append(ctx.matchfileset(pat, badfn=badfn))
61 matchers.append(ctx.matchfileset(pat, badfn=badfn))
56
62
57 if listsubrepos:
63 if listsubrepos:
58 for subpath in ctx.substate:
64 for subpath in ctx.substate:
59 sm = ctx.sub(subpath).matchfileset(pat, badfn=badfn)
65 sm = ctx.sub(subpath).matchfileset(pat, badfn=badfn)
60 pm = prefixdirmatcher(subpath, sm, badfn=badfn)
66 pm = prefixdirmatcher(subpath, sm, badfn=badfn)
61 matchers.append(pm)
67 matchers.append(pm)
62
68
63 continue
69 continue
64 other.append((kind, pat, source))
70 other.append((kind, pat, source))
65 return matchers, other
71 return matchers, other
66
72
67 def _expandsubinclude(kindpats, root):
73 def _expandsubinclude(kindpats, root):
68 '''Returns the list of subinclude matcher args and the kindpats without the
74 '''Returns the list of subinclude matcher args and the kindpats without the
69 subincludes in it.'''
75 subincludes in it.'''
70 relmatchers = []
76 relmatchers = []
71 other = []
77 other = []
72
78
73 for kind, pat, source in kindpats:
79 for kind, pat, source in kindpats:
74 if kind == 'subinclude':
80 if kind == 'subinclude':
75 sourceroot = pathutil.dirname(util.normpath(source))
81 sourceroot = pathutil.dirname(util.normpath(source))
76 pat = util.pconvert(pat)
82 pat = util.pconvert(pat)
77 path = pathutil.join(sourceroot, pat)
83 path = pathutil.join(sourceroot, pat)
78
84
79 newroot = pathutil.dirname(path)
85 newroot = pathutil.dirname(path)
80 matcherargs = (newroot, '', [], ['include:%s' % path])
86 matcherargs = (newroot, '', [], ['include:%s' % path])
81
87
82 prefix = pathutil.canonpath(root, root, newroot)
88 prefix = pathutil.canonpath(root, root, newroot)
83 if prefix:
89 if prefix:
84 prefix += '/'
90 prefix += '/'
85 relmatchers.append((prefix, matcherargs))
91 relmatchers.append((prefix, matcherargs))
86 else:
92 else:
87 other.append((kind, pat, source))
93 other.append((kind, pat, source))
88
94
89 return relmatchers, other
95 return relmatchers, other
90
96
91 def _kindpatsalwaysmatch(kindpats):
97 def _kindpatsalwaysmatch(kindpats):
92 """"Checks whether the kindspats match everything, as e.g.
98 """"Checks whether the kindspats match everything, as e.g.
93 'relpath:.' does.
99 'relpath:.' does.
94 """
100 """
95 for kind, pat, source in kindpats:
101 for kind, pat, source in kindpats:
96 if pat != '' or kind not in ['relpath', 'glob']:
102 if pat != '' or kind not in ['relpath', 'glob']:
97 return False
103 return False
98 return True
104 return True
99
105
100 def _buildkindpatsmatcher(matchercls, root, kindpats, ctx=None,
106 def _buildkindpatsmatcher(matchercls, root, kindpats, ctx=None,
101 listsubrepos=False, badfn=None):
107 listsubrepos=False, badfn=None):
102 matchers = []
108 matchers = []
103 fms, kindpats = _expandsets(kindpats, ctx=ctx,
109 fms, kindpats = _expandsets(kindpats, ctx=ctx,
104 listsubrepos=listsubrepos, badfn=badfn)
110 listsubrepos=listsubrepos, badfn=badfn)
105 if kindpats:
111 if kindpats:
106 m = matchercls(root, kindpats, badfn=badfn)
112 m = matchercls(root, kindpats, badfn=badfn)
107 matchers.append(m)
113 matchers.append(m)
108 if fms:
114 if fms:
109 matchers.extend(fms)
115 matchers.extend(fms)
110 if not matchers:
116 if not matchers:
111 return nevermatcher(badfn=badfn)
117 return nevermatcher(badfn=badfn)
112 if len(matchers) == 1:
118 if len(matchers) == 1:
113 return matchers[0]
119 return matchers[0]
114 return unionmatcher(matchers)
120 return unionmatcher(matchers)
115
121
116 def match(root, cwd, patterns=None, include=None, exclude=None, default='glob',
122 def match(root, cwd, patterns=None, include=None, exclude=None, default='glob',
117 auditor=None, ctx=None, listsubrepos=False, warn=None,
123 auditor=None, ctx=None, listsubrepos=False, warn=None,
118 badfn=None, icasefs=False):
124 badfn=None, icasefs=False):
119 r"""build an object to match a set of file patterns
125 r"""build an object to match a set of file patterns
120
126
121 arguments:
127 arguments:
122 root - the canonical root of the tree you're matching against
128 root - the canonical root of the tree you're matching against
123 cwd - the current working directory, if relevant
129 cwd - the current working directory, if relevant
124 patterns - patterns to find
130 patterns - patterns to find
125 include - patterns to include (unless they are excluded)
131 include - patterns to include (unless they are excluded)
126 exclude - patterns to exclude (even if they are included)
132 exclude - patterns to exclude (even if they are included)
127 default - if a pattern in patterns has no explicit type, assume this one
133 default - if a pattern in patterns has no explicit type, assume this one
128 auditor - optional path auditor
134 auditor - optional path auditor
129 ctx - optional changecontext
135 ctx - optional changecontext
130 listsubrepos - if True, recurse into subrepositories
136 listsubrepos - if True, recurse into subrepositories
131 warn - optional function used for printing warnings
137 warn - optional function used for printing warnings
132 badfn - optional bad() callback for this matcher instead of the default
138 badfn - optional bad() callback for this matcher instead of the default
133 icasefs - make a matcher for wdir on case insensitive filesystems, which
139 icasefs - make a matcher for wdir on case insensitive filesystems, which
134 normalizes the given patterns to the case in the filesystem
140 normalizes the given patterns to the case in the filesystem
135
141
136 a pattern is one of:
142 a pattern is one of:
137 'glob:<glob>' - a glob relative to cwd
143 'glob:<glob>' - a glob relative to cwd
138 're:<regexp>' - a regular expression
144 're:<regexp>' - a regular expression
139 'path:<path>' - a path relative to repository root, which is matched
145 'path:<path>' - a path relative to repository root, which is matched
140 recursively
146 recursively
141 'rootfilesin:<path>' - a path relative to repository root, which is
147 'rootfilesin:<path>' - a path relative to repository root, which is
142 matched non-recursively (will not match subdirectories)
148 matched non-recursively (will not match subdirectories)
143 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
149 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
144 'relpath:<path>' - a path relative to cwd
150 'relpath:<path>' - a path relative to cwd
145 'relre:<regexp>' - a regexp that needn't match the start of a name
151 'relre:<regexp>' - a regexp that needn't match the start of a name
146 'set:<fileset>' - a fileset expression
152 'set:<fileset>' - a fileset expression
147 'include:<path>' - a file of patterns to read and include
153 'include:<path>' - a file of patterns to read and include
148 'subinclude:<path>' - a file of patterns to match against files under
154 'subinclude:<path>' - a file of patterns to match against files under
149 the same directory
155 the same directory
150 '<something>' - a pattern of the specified default type
156 '<something>' - a pattern of the specified default type
151
157
152 Usually a patternmatcher is returned:
158 Usually a patternmatcher is returned:
153 >>> match(b'foo', b'.', [b're:.*\.c$', b'path:foo/a', b'*.py'])
159 >>> match(b'foo', b'.', [b're:.*\.c$', b'path:foo/a', b'*.py'])
154 <patternmatcher patterns='.*\\.c$|foo/a(?:/|$)|[^/]*\\.py$'>
160 <patternmatcher patterns='.*\\.c$|foo/a(?:/|$)|[^/]*\\.py$'>
155
161
156 Combining 'patterns' with 'include' (resp. 'exclude') gives an
162 Combining 'patterns' with 'include' (resp. 'exclude') gives an
157 intersectionmatcher (resp. a differencematcher):
163 intersectionmatcher (resp. a differencematcher):
158 >>> type(match(b'foo', b'.', [b're:.*\.c$'], include=[b'path:lib']))
164 >>> type(match(b'foo', b'.', [b're:.*\.c$'], include=[b'path:lib']))
159 <class 'mercurial.match.intersectionmatcher'>
165 <class 'mercurial.match.intersectionmatcher'>
160 >>> type(match(b'foo', b'.', [b're:.*\.c$'], exclude=[b'path:build']))
166 >>> type(match(b'foo', b'.', [b're:.*\.c$'], exclude=[b'path:build']))
161 <class 'mercurial.match.differencematcher'>
167 <class 'mercurial.match.differencematcher'>
162
168
163 Notice that, if 'patterns' is empty, an alwaysmatcher is returned:
169 Notice that, if 'patterns' is empty, an alwaysmatcher is returned:
164 >>> match(b'foo', b'.', [])
170 >>> match(b'foo', b'.', [])
165 <alwaysmatcher>
171 <alwaysmatcher>
166
172
167 The 'default' argument determines which kind of pattern is assumed if a
173 The 'default' argument determines which kind of pattern is assumed if a
168 pattern has no prefix:
174 pattern has no prefix:
169 >>> match(b'foo', b'.', [b'.*\.c$'], default=b're')
175 >>> match(b'foo', b'.', [b'.*\.c$'], default=b're')
170 <patternmatcher patterns='.*\\.c$'>
176 <patternmatcher patterns='.*\\.c$'>
171 >>> match(b'foo', b'.', [b'main.py'], default=b'relpath')
177 >>> match(b'foo', b'.', [b'main.py'], default=b'relpath')
172 <patternmatcher patterns='main\\.py(?:/|$)'>
178 <patternmatcher patterns='main\\.py(?:/|$)'>
173 >>> match(b'foo', b'.', [b'main.py'], default=b're')
179 >>> match(b'foo', b'.', [b'main.py'], default=b're')
174 <patternmatcher patterns='main.py'>
180 <patternmatcher patterns='main.py'>
175
181
176 The primary use of matchers is to check whether a value (usually a file
182 The primary use of matchers is to check whether a value (usually a file
177 name) matches againset one of the patterns given at initialization. There
183 name) matches againset one of the patterns given at initialization. There
178 are two ways of doing this check.
184 are two ways of doing this check.
179
185
180 >>> m = match(b'foo', b'', [b're:.*\.c$', b'relpath:a'])
186 >>> m = match(b'foo', b'', [b're:.*\.c$', b'relpath:a'])
181
187
182 1. Calling the matcher with a file name returns True if any pattern
188 1. Calling the matcher with a file name returns True if any pattern
183 matches that file name:
189 matches that file name:
184 >>> m(b'a')
190 >>> m(b'a')
185 True
191 True
186 >>> m(b'main.c')
192 >>> m(b'main.c')
187 True
193 True
188 >>> m(b'test.py')
194 >>> m(b'test.py')
189 False
195 False
190
196
191 2. Using the exact() method only returns True if the file name matches one
197 2. Using the exact() method only returns True if the file name matches one
192 of the exact patterns (i.e. not re: or glob: patterns):
198 of the exact patterns (i.e. not re: or glob: patterns):
193 >>> m.exact(b'a')
199 >>> m.exact(b'a')
194 True
200 True
195 >>> m.exact(b'main.c')
201 >>> m.exact(b'main.c')
196 False
202 False
197 """
203 """
198 normalize = _donormalize
204 normalize = _donormalize
199 if icasefs:
205 if icasefs:
200 dirstate = ctx.repo().dirstate
206 dirstate = ctx.repo().dirstate
201 dsnormalize = dirstate.normalize
207 dsnormalize = dirstate.normalize
202
208
203 def normalize(patterns, default, root, cwd, auditor, warn):
209 def normalize(patterns, default, root, cwd, auditor, warn):
204 kp = _donormalize(patterns, default, root, cwd, auditor, warn)
210 kp = _donormalize(patterns, default, root, cwd, auditor, warn)
205 kindpats = []
211 kindpats = []
206 for kind, pats, source in kp:
212 for kind, pats, source in kp:
207 if kind not in ('re', 'relre'): # regex can't be normalized
213 if kind not in ('re', 'relre'): # regex can't be normalized
208 p = pats
214 p = pats
209 pats = dsnormalize(pats)
215 pats = dsnormalize(pats)
210
216
211 # Preserve the original to handle a case only rename.
217 # Preserve the original to handle a case only rename.
212 if p != pats and p in dirstate:
218 if p != pats and p in dirstate:
213 kindpats.append((kind, p, source))
219 kindpats.append((kind, p, source))
214
220
215 kindpats.append((kind, pats, source))
221 kindpats.append((kind, pats, source))
216 return kindpats
222 return kindpats
217
223
218 if patterns:
224 if patterns:
219 kindpats = normalize(patterns, default, root, cwd, auditor, warn)
225 kindpats = normalize(patterns, default, root, cwd, auditor, warn)
220 if _kindpatsalwaysmatch(kindpats):
226 if _kindpatsalwaysmatch(kindpats):
221 m = alwaysmatcher(badfn)
227 m = alwaysmatcher(badfn)
222 else:
228 else:
223 m = _buildkindpatsmatcher(patternmatcher, root, kindpats, ctx=ctx,
229 m = _buildkindpatsmatcher(patternmatcher, root, kindpats, ctx=ctx,
224 listsubrepos=listsubrepos, badfn=badfn)
230 listsubrepos=listsubrepos, badfn=badfn)
225 else:
231 else:
226 # It's a little strange that no patterns means to match everything.
232 # It's a little strange that no patterns means to match everything.
227 # Consider changing this to match nothing (probably using nevermatcher).
233 # Consider changing this to match nothing (probably using nevermatcher).
228 m = alwaysmatcher(badfn)
234 m = alwaysmatcher(badfn)
229
235
230 if include:
236 if include:
231 kindpats = normalize(include, 'glob', root, cwd, auditor, warn)
237 kindpats = normalize(include, 'glob', root, cwd, auditor, warn)
232 im = _buildkindpatsmatcher(includematcher, root, kindpats, ctx=ctx,
238 im = _buildkindpatsmatcher(includematcher, root, kindpats, ctx=ctx,
233 listsubrepos=listsubrepos, badfn=None)
239 listsubrepos=listsubrepos, badfn=None)
234 m = intersectmatchers(m, im)
240 m = intersectmatchers(m, im)
235 if exclude:
241 if exclude:
236 kindpats = normalize(exclude, 'glob', root, cwd, auditor, warn)
242 kindpats = normalize(exclude, 'glob', root, cwd, auditor, warn)
237 em = _buildkindpatsmatcher(includematcher, root, kindpats, ctx=ctx,
243 em = _buildkindpatsmatcher(includematcher, root, kindpats, ctx=ctx,
238 listsubrepos=listsubrepos, badfn=None)
244 listsubrepos=listsubrepos, badfn=None)
239 m = differencematcher(m, em)
245 m = differencematcher(m, em)
240 return m
246 return m
241
247
242 def exact(files, badfn=None):
248 def exact(files, badfn=None):
243 return exactmatcher(files, badfn=badfn)
249 return exactmatcher(files, badfn=badfn)
244
250
245 def always(badfn=None):
251 def always(badfn=None):
246 return alwaysmatcher(badfn)
252 return alwaysmatcher(badfn)
247
253
248 def never(badfn=None):
254 def never(badfn=None):
249 return nevermatcher(badfn)
255 return nevermatcher(badfn)
250
256
251 def badmatch(match, badfn):
257 def badmatch(match, badfn):
252 """Make a copy of the given matcher, replacing its bad method with the given
258 """Make a copy of the given matcher, replacing its bad method with the given
253 one.
259 one.
254 """
260 """
255 m = copy.copy(match)
261 m = copy.copy(match)
256 m.bad = badfn
262 m.bad = badfn
257 return m
263 return m
258
264
259 def _donormalize(patterns, default, root, cwd, auditor=None, warn=None):
265 def _donormalize(patterns, default, root, cwd, auditor=None, warn=None):
260 '''Convert 'kind:pat' from the patterns list to tuples with kind and
266 '''Convert 'kind:pat' from the patterns list to tuples with kind and
261 normalized and rooted patterns and with listfiles expanded.'''
267 normalized and rooted patterns and with listfiles expanded.'''
262 kindpats = []
268 kindpats = []
263 for kind, pat in [_patsplit(p, default) for p in patterns]:
269 for kind, pat in [_patsplit(p, default) for p in patterns]:
264 if kind in cwdrelativepatternkinds:
270 if kind in cwdrelativepatternkinds:
265 pat = pathutil.canonpath(root, cwd, pat, auditor=auditor)
271 pat = pathutil.canonpath(root, cwd, pat, auditor=auditor)
266 elif kind in ('relglob', 'path', 'rootfilesin', 'rootglob'):
272 elif kind in ('relglob', 'path', 'rootfilesin', 'rootglob'):
267 pat = util.normpath(pat)
273 pat = util.normpath(pat)
268 elif kind in ('listfile', 'listfile0'):
274 elif kind in ('listfile', 'listfile0'):
269 try:
275 try:
270 files = util.readfile(pat)
276 files = util.readfile(pat)
271 if kind == 'listfile0':
277 if kind == 'listfile0':
272 files = files.split('\0')
278 files = files.split('\0')
273 else:
279 else:
274 files = files.splitlines()
280 files = files.splitlines()
275 files = [f for f in files if f]
281 files = [f for f in files if f]
276 except EnvironmentError:
282 except EnvironmentError:
277 raise error.Abort(_("unable to read file list (%s)") % pat)
283 raise error.Abort(_("unable to read file list (%s)") % pat)
278 for k, p, source in _donormalize(files, default, root, cwd,
284 for k, p, source in _donormalize(files, default, root, cwd,
279 auditor, warn):
285 auditor, warn):
280 kindpats.append((k, p, pat))
286 kindpats.append((k, p, pat))
281 continue
287 continue
282 elif kind == 'include':
288 elif kind == 'include':
283 try:
289 try:
284 fullpath = os.path.join(root, util.localpath(pat))
290 fullpath = os.path.join(root, util.localpath(pat))
285 includepats = readpatternfile(fullpath, warn)
291 includepats = readpatternfile(fullpath, warn)
286 for k, p, source in _donormalize(includepats, default,
292 for k, p, source in _donormalize(includepats, default,
287 root, cwd, auditor, warn):
293 root, cwd, auditor, warn):
288 kindpats.append((k, p, source or pat))
294 kindpats.append((k, p, source or pat))
289 except error.Abort as inst:
295 except error.Abort as inst:
290 raise error.Abort('%s: %s' % (pat, inst[0]))
296 raise error.Abort('%s: %s' % (pat, inst[0]))
291 except IOError as inst:
297 except IOError as inst:
292 if warn:
298 if warn:
293 warn(_("skipping unreadable pattern file '%s': %s\n") %
299 warn(_("skipping unreadable pattern file '%s': %s\n") %
294 (pat, stringutil.forcebytestr(inst.strerror)))
300 (pat, stringutil.forcebytestr(inst.strerror)))
295 continue
301 continue
296 # else: re or relre - which cannot be normalized
302 # else: re or relre - which cannot be normalized
297 kindpats.append((kind, pat, ''))
303 kindpats.append((kind, pat, ''))
298 return kindpats
304 return kindpats
299
305
300 class basematcher(object):
306 class basematcher(object):
301
307
302 def __init__(self, badfn=None):
308 def __init__(self, badfn=None):
303 if badfn is not None:
309 if badfn is not None:
304 self.bad = badfn
310 self.bad = badfn
305
311
306 def __call__(self, fn):
312 def __call__(self, fn):
307 return self.matchfn(fn)
313 return self.matchfn(fn)
308 # Callbacks related to how the matcher is used by dirstate.walk.
314 # Callbacks related to how the matcher is used by dirstate.walk.
309 # Subscribers to these events must monkeypatch the matcher object.
315 # Subscribers to these events must monkeypatch the matcher object.
310 def bad(self, f, msg):
316 def bad(self, f, msg):
311 '''Callback from dirstate.walk for each explicit file that can't be
317 '''Callback from dirstate.walk for each explicit file that can't be
312 found/accessed, with an error message.'''
318 found/accessed, with an error message.'''
313
319
314 # If an explicitdir is set, it will be called when an explicitly listed
320 # If an explicitdir is set, it will be called when an explicitly listed
315 # directory is visited.
321 # directory is visited.
316 explicitdir = None
322 explicitdir = None
317
323
318 # If an traversedir is set, it will be called when a directory discovered
324 # If an traversedir is set, it will be called when a directory discovered
319 # by recursive traversal is visited.
325 # by recursive traversal is visited.
320 traversedir = None
326 traversedir = None
321
327
322 @propertycache
328 @propertycache
323 def _files(self):
329 def _files(self):
324 return []
330 return []
325
331
326 def files(self):
332 def files(self):
327 '''Explicitly listed files or patterns or roots:
333 '''Explicitly listed files or patterns or roots:
328 if no patterns or .always(): empty list,
334 if no patterns or .always(): empty list,
329 if exact: list exact files,
335 if exact: list exact files,
330 if not .anypats(): list all files and dirs,
336 if not .anypats(): list all files and dirs,
331 else: optimal roots'''
337 else: optimal roots'''
332 return self._files
338 return self._files
333
339
334 @propertycache
340 @propertycache
335 def _fileset(self):
341 def _fileset(self):
336 return set(self._files)
342 return set(self._files)
337
343
338 def exact(self, f):
344 def exact(self, f):
339 '''Returns True if f is in .files().'''
345 '''Returns True if f is in .files().'''
340 return f in self._fileset
346 return f in self._fileset
341
347
342 def matchfn(self, f):
348 def matchfn(self, f):
343 return False
349 return False
344
350
345 def visitdir(self, dir):
351 def visitdir(self, dir):
346 '''Decides whether a directory should be visited based on whether it
352 '''Decides whether a directory should be visited based on whether it
347 has potential matches in it or one of its subdirectories. This is
353 has potential matches in it or one of its subdirectories. This is
348 based on the match's primary, included, and excluded patterns.
354 based on the match's primary, included, and excluded patterns.
349
355
350 Returns the string 'all' if the given directory and all subdirectories
356 Returns the string 'all' if the given directory and all subdirectories
351 should be visited. Otherwise returns True or False indicating whether
357 should be visited. Otherwise returns True or False indicating whether
352 the given directory should be visited.
358 the given directory should be visited.
353 '''
359 '''
354 return True
360 return True
355
361
356 def visitchildrenset(self, dir):
362 def visitchildrenset(self, dir):
357 '''Decides whether a directory should be visited based on whether it
363 '''Decides whether a directory should be visited based on whether it
358 has potential matches in it or one of its subdirectories, and
364 has potential matches in it or one of its subdirectories, and
359 potentially lists which subdirectories of that directory should be
365 potentially lists which subdirectories of that directory should be
360 visited. This is based on the match's primary, included, and excluded
366 visited. This is based on the match's primary, included, and excluded
361 patterns.
367 patterns.
362
368
363 This function is very similar to 'visitdir', and the following mapping
369 This function is very similar to 'visitdir', and the following mapping
364 can be applied:
370 can be applied:
365
371
366 visitdir | visitchildrenlist
372 visitdir | visitchildrenlist
367 ----------+-------------------
373 ----------+-------------------
368 False | set()
374 False | set()
369 'all' | 'all'
375 'all' | 'all'
370 True | 'this' OR non-empty set of subdirs -or files- to visit
376 True | 'this' OR non-empty set of subdirs -or files- to visit
371
377
372 Example:
378 Example:
373 Assume matchers ['path:foo/bar', 'rootfilesin:qux'], we would return
379 Assume matchers ['path:foo/bar', 'rootfilesin:qux'], we would return
374 the following values (assuming the implementation of visitchildrenset
380 the following values (assuming the implementation of visitchildrenset
375 is capable of recognizing this; some implementations are not).
381 is capable of recognizing this; some implementations are not).
376
382
377 '.' -> {'foo', 'qux'}
383 '.' -> {'foo', 'qux'}
378 'baz' -> set()
384 'baz' -> set()
379 'foo' -> {'bar'}
385 'foo' -> {'bar'}
380 # Ideally this would be 'all', but since the prefix nature of matchers
386 # Ideally this would be 'all', but since the prefix nature of matchers
381 # is applied to the entire matcher, we have to downgrade this to
387 # is applied to the entire matcher, we have to downgrade this to
382 # 'this' due to the non-prefix 'rootfilesin'-kind matcher being mixed
388 # 'this' due to the non-prefix 'rootfilesin'-kind matcher being mixed
383 # in.
389 # in.
384 'foo/bar' -> 'this'
390 'foo/bar' -> 'this'
385 'qux' -> 'this'
391 'qux' -> 'this'
386
392
387 Important:
393 Important:
388 Most matchers do not know if they're representing files or
394 Most matchers do not know if they're representing files or
389 directories. They see ['path:dir/f'] and don't know whether 'f' is a
395 directories. They see ['path:dir/f'] and don't know whether 'f' is a
390 file or a directory, so visitchildrenset('dir') for most matchers will
396 file or a directory, so visitchildrenset('dir') for most matchers will
391 return {'f'}, but if the matcher knows it's a file (like exactmatcher
397 return {'f'}, but if the matcher knows it's a file (like exactmatcher
392 does), it may return 'this'. Do not rely on the return being a set
398 does), it may return 'this'. Do not rely on the return being a set
393 indicating that there are no files in this dir to investigate (or
399 indicating that there are no files in this dir to investigate (or
394 equivalently that if there are files to investigate in 'dir' that it
400 equivalently that if there are files to investigate in 'dir' that it
395 will always return 'this').
401 will always return 'this').
396 '''
402 '''
397 return 'this'
403 return 'this'
398
404
399 def always(self):
405 def always(self):
400 '''Matcher will match everything and .files() will be empty --
406 '''Matcher will match everything and .files() will be empty --
401 optimization might be possible.'''
407 optimization might be possible.'''
402 return False
408 return False
403
409
404 def isexact(self):
410 def isexact(self):
405 '''Matcher will match exactly the list of files in .files() --
411 '''Matcher will match exactly the list of files in .files() --
406 optimization might be possible.'''
412 optimization might be possible.'''
407 return False
413 return False
408
414
409 def prefix(self):
415 def prefix(self):
410 '''Matcher will match the paths in .files() recursively --
416 '''Matcher will match the paths in .files() recursively --
411 optimization might be possible.'''
417 optimization might be possible.'''
412 return False
418 return False
413
419
414 def anypats(self):
420 def anypats(self):
415 '''None of .always(), .isexact(), and .prefix() is true --
421 '''None of .always(), .isexact(), and .prefix() is true --
416 optimizations will be difficult.'''
422 optimizations will be difficult.'''
417 return not self.always() and not self.isexact() and not self.prefix()
423 return not self.always() and not self.isexact() and not self.prefix()
418
424
419 class alwaysmatcher(basematcher):
425 class alwaysmatcher(basematcher):
420 '''Matches everything.'''
426 '''Matches everything.'''
421
427
422 def __init__(self, badfn=None):
428 def __init__(self, badfn=None):
423 super(alwaysmatcher, self).__init__(badfn)
429 super(alwaysmatcher, self).__init__(badfn)
424
430
425 def always(self):
431 def always(self):
426 return True
432 return True
427
433
428 def matchfn(self, f):
434 def matchfn(self, f):
429 return True
435 return True
430
436
431 def visitdir(self, dir):
437 def visitdir(self, dir):
432 return 'all'
438 return 'all'
433
439
434 def visitchildrenset(self, dir):
440 def visitchildrenset(self, dir):
435 return 'all'
441 return 'all'
436
442
437 def __repr__(self):
443 def __repr__(self):
438 return r'<alwaysmatcher>'
444 return r'<alwaysmatcher>'
439
445
440 class nevermatcher(basematcher):
446 class nevermatcher(basematcher):
441 '''Matches nothing.'''
447 '''Matches nothing.'''
442
448
443 def __init__(self, badfn=None):
449 def __init__(self, badfn=None):
444 super(nevermatcher, self).__init__(badfn)
450 super(nevermatcher, self).__init__(badfn)
445
451
446 # It's a little weird to say that the nevermatcher is an exact matcher
452 # It's a little weird to say that the nevermatcher is an exact matcher
447 # or a prefix matcher, but it seems to make sense to let callers take
453 # or a prefix matcher, but it seems to make sense to let callers take
448 # fast paths based on either. There will be no exact matches, nor any
454 # fast paths based on either. There will be no exact matches, nor any
449 # prefixes (files() returns []), so fast paths iterating over them should
455 # prefixes (files() returns []), so fast paths iterating over them should
450 # be efficient (and correct).
456 # be efficient (and correct).
451 def isexact(self):
457 def isexact(self):
452 return True
458 return True
453
459
454 def prefix(self):
460 def prefix(self):
455 return True
461 return True
456
462
457 def visitdir(self, dir):
463 def visitdir(self, dir):
458 return False
464 return False
459
465
460 def visitchildrenset(self, dir):
466 def visitchildrenset(self, dir):
461 return set()
467 return set()
462
468
463 def __repr__(self):
469 def __repr__(self):
464 return r'<nevermatcher>'
470 return r'<nevermatcher>'
465
471
466 class predicatematcher(basematcher):
472 class predicatematcher(basematcher):
467 """A matcher adapter for a simple boolean function"""
473 """A matcher adapter for a simple boolean function"""
468
474
469 def __init__(self, predfn, predrepr=None, badfn=None):
475 def __init__(self, predfn, predrepr=None, badfn=None):
470 super(predicatematcher, self).__init__(badfn)
476 super(predicatematcher, self).__init__(badfn)
471 self.matchfn = predfn
477 self.matchfn = predfn
472 self._predrepr = predrepr
478 self._predrepr = predrepr
473
479
474 @encoding.strmethod
480 @encoding.strmethod
475 def __repr__(self):
481 def __repr__(self):
476 s = (stringutil.buildrepr(self._predrepr)
482 s = (stringutil.buildrepr(self._predrepr)
477 or pycompat.byterepr(self.matchfn))
483 or pycompat.byterepr(self.matchfn))
478 return '<predicatenmatcher pred=%s>' % s
484 return '<predicatenmatcher pred=%s>' % s
479
485
480 class patternmatcher(basematcher):
486 class patternmatcher(basematcher):
481 """Matches a set of (kind, pat, source) against a 'root' directory.
487 """Matches a set of (kind, pat, source) against a 'root' directory.
482
488
483 >>> kindpats = [
489 >>> kindpats = [
484 ... (b're', br'.*\.c$', b''),
490 ... (b're', br'.*\.c$', b''),
485 ... (b'path', b'foo/a', b''),
491 ... (b'path', b'foo/a', b''),
486 ... (b'relpath', b'b', b''),
492 ... (b'relpath', b'b', b''),
487 ... (b'glob', b'*.h', b''),
493 ... (b'glob', b'*.h', b''),
488 ... ]
494 ... ]
489 >>> m = patternmatcher(b'foo', kindpats)
495 >>> m = patternmatcher(b'foo', kindpats)
490 >>> m(b'main.c') # matches re:.*\.c$
496 >>> m(b'main.c') # matches re:.*\.c$
491 True
497 True
492 >>> m(b'b.txt')
498 >>> m(b'b.txt')
493 False
499 False
494 >>> m(b'foo/a') # matches path:foo/a
500 >>> m(b'foo/a') # matches path:foo/a
495 True
501 True
496 >>> m(b'a') # does not match path:b, since 'root' is 'foo'
502 >>> m(b'a') # does not match path:b, since 'root' is 'foo'
497 False
503 False
498 >>> m(b'b') # matches relpath:b, since 'root' is 'foo'
504 >>> m(b'b') # matches relpath:b, since 'root' is 'foo'
499 True
505 True
500 >>> m(b'lib.h') # matches glob:*.h
506 >>> m(b'lib.h') # matches glob:*.h
501 True
507 True
502
508
503 >>> m.files()
509 >>> m.files()
504 ['.', 'foo/a', 'b', '.']
510 ['.', 'foo/a', 'b', '.']
505 >>> m.exact(b'foo/a')
511 >>> m.exact(b'foo/a')
506 True
512 True
507 >>> m.exact(b'b')
513 >>> m.exact(b'b')
508 True
514 True
509 >>> m.exact(b'lib.h') # exact matches are for (rel)path kinds
515 >>> m.exact(b'lib.h') # exact matches are for (rel)path kinds
510 False
516 False
511 """
517 """
512
518
513 def __init__(self, root, kindpats, badfn=None):
519 def __init__(self, root, kindpats, badfn=None):
514 super(patternmatcher, self).__init__(badfn)
520 super(patternmatcher, self).__init__(badfn)
515
521
516 self._files = _explicitfiles(kindpats)
522 self._files = _explicitfiles(kindpats)
517 self._prefix = _prefix(kindpats)
523 self._prefix = _prefix(kindpats)
518 self._pats, self.matchfn = _buildmatch(kindpats, '$', root)
524 self._pats, self.matchfn = _buildmatch(kindpats, '$', root)
519
525
520 @propertycache
526 @propertycache
521 def _dirs(self):
527 def _dirs(self):
522 return set(util.dirs(self._fileset)) | {'.'}
528 return set(util.dirs(self._fileset)) | {'.'}
523
529
524 def visitdir(self, dir):
530 def visitdir(self, dir):
525 if self._prefix and dir in self._fileset:
531 if self._prefix and dir in self._fileset:
526 return 'all'
532 return 'all'
527 return ('.' in self._fileset or
533 return ('.' in self._fileset or
528 dir in self._fileset or
534 dir in self._fileset or
529 dir in self._dirs or
535 dir in self._dirs or
530 any(parentdir in self._fileset
536 any(parentdir in self._fileset
531 for parentdir in util.finddirs(dir)))
537 for parentdir in util.finddirs(dir)))
532
538
533 def visitchildrenset(self, dir):
539 def visitchildrenset(self, dir):
534 ret = self.visitdir(dir)
540 ret = self.visitdir(dir)
535 if ret is True:
541 if ret is True:
536 return 'this'
542 return 'this'
537 elif not ret:
543 elif not ret:
538 return set()
544 return set()
539 assert ret == 'all'
545 assert ret == 'all'
540 return 'all'
546 return 'all'
541
547
542 def prefix(self):
548 def prefix(self):
543 return self._prefix
549 return self._prefix
544
550
545 @encoding.strmethod
551 @encoding.strmethod
546 def __repr__(self):
552 def __repr__(self):
547 return ('<patternmatcher patterns=%r>' % pycompat.bytestr(self._pats))
553 return ('<patternmatcher patterns=%r>' % pycompat.bytestr(self._pats))
548
554
549 # This is basically a reimplementation of util.dirs that stores the children
555 # This is basically a reimplementation of util.dirs that stores the children
550 # instead of just a count of them, plus a small optional optimization to avoid
556 # instead of just a count of them, plus a small optional optimization to avoid
551 # some directories we don't need.
557 # some directories we don't need.
552 class _dirchildren(object):
558 class _dirchildren(object):
553 def __init__(self, paths, onlyinclude=None):
559 def __init__(self, paths, onlyinclude=None):
554 self._dirs = {}
560 self._dirs = {}
555 self._onlyinclude = onlyinclude or []
561 self._onlyinclude = onlyinclude or []
556 addpath = self.addpath
562 addpath = self.addpath
557 for f in paths:
563 for f in paths:
558 addpath(f)
564 addpath(f)
559
565
560 def addpath(self, path):
566 def addpath(self, path):
561 if path == '.':
567 if path == '.':
562 return
568 return
563 dirs = self._dirs
569 dirs = self._dirs
564 findsplitdirs = _dirchildren._findsplitdirs
570 findsplitdirs = _dirchildren._findsplitdirs
565 for d, b in findsplitdirs(path):
571 for d, b in findsplitdirs(path):
566 if d not in self._onlyinclude:
572 if d not in self._onlyinclude:
567 continue
573 continue
568 dirs.setdefault(d, set()).add(b)
574 dirs.setdefault(d, set()).add(b)
569
575
570 @staticmethod
576 @staticmethod
571 def _findsplitdirs(path):
577 def _findsplitdirs(path):
572 # yields (dirname, basename) tuples, walking back to the root. This is
578 # yields (dirname, basename) tuples, walking back to the root. This is
573 # very similar to util.finddirs, except:
579 # very similar to util.finddirs, except:
574 # - produces a (dirname, basename) tuple, not just 'dirname'
580 # - produces a (dirname, basename) tuple, not just 'dirname'
575 # - includes root dir
581 # - includes root dir
576 # Unlike manifest._splittopdir, this does not suffix `dirname` with a
582 # Unlike manifest._splittopdir, this does not suffix `dirname` with a
577 # slash, and produces '.' for the root instead of ''.
583 # slash, and produces '.' for the root instead of ''.
578 oldpos = len(path)
584 oldpos = len(path)
579 pos = path.rfind('/')
585 pos = path.rfind('/')
580 while pos != -1:
586 while pos != -1:
581 yield path[:pos], path[pos + 1:oldpos]
587 yield path[:pos], path[pos + 1:oldpos]
582 oldpos = pos
588 oldpos = pos
583 pos = path.rfind('/', 0, pos)
589 pos = path.rfind('/', 0, pos)
584 yield '.', path[:oldpos]
590 yield '.', path[:oldpos]
585
591
586 def get(self, path):
592 def get(self, path):
587 return self._dirs.get(path, set())
593 return self._dirs.get(path, set())
588
594
589 class includematcher(basematcher):
595 class includematcher(basematcher):
590
596
591 def __init__(self, root, kindpats, badfn=None):
597 def __init__(self, root, kindpats, badfn=None):
592 super(includematcher, self).__init__(badfn)
598 super(includematcher, self).__init__(badfn)
593
599
594 self._pats, self.matchfn = _buildmatch(kindpats, '(?:/|$)', root)
600 self._pats, self.matchfn = _buildmatch(kindpats, '(?:/|$)', root)
595 self._prefix = _prefix(kindpats)
601 self._prefix = _prefix(kindpats)
596 roots, dirs, parents = _rootsdirsandparents(kindpats)
602 roots, dirs, parents = _rootsdirsandparents(kindpats)
597 # roots are directories which are recursively included.
603 # roots are directories which are recursively included.
598 self._roots = set(roots)
604 self._roots = set(roots)
599 # dirs are directories which are non-recursively included.
605 # dirs are directories which are non-recursively included.
600 self._dirs = set(dirs)
606 self._dirs = set(dirs)
601 # parents are directories which are non-recursively included because
607 # parents are directories which are non-recursively included because
602 # they are needed to get to items in _dirs or _roots.
608 # they are needed to get to items in _dirs or _roots.
603 self._parents = set(parents)
609 self._parents = set(parents)
604
610
605 def visitdir(self, dir):
611 def visitdir(self, dir):
606 if self._prefix and dir in self._roots:
612 if self._prefix and dir in self._roots:
607 return 'all'
613 return 'all'
608 return ('.' in self._roots or
614 return ('.' in self._roots or
609 dir in self._roots or
615 dir in self._roots or
610 dir in self._dirs or
616 dir in self._dirs or
611 dir in self._parents or
617 dir in self._parents or
612 any(parentdir in self._roots
618 any(parentdir in self._roots
613 for parentdir in util.finddirs(dir)))
619 for parentdir in util.finddirs(dir)))
614
620
615 @propertycache
621 @propertycache
616 def _allparentschildren(self):
622 def _allparentschildren(self):
617 # It may seem odd that we add dirs, roots, and parents, and then
623 # It may seem odd that we add dirs, roots, and parents, and then
618 # restrict to only parents. This is to catch the case of:
624 # restrict to only parents. This is to catch the case of:
619 # dirs = ['foo/bar']
625 # dirs = ['foo/bar']
620 # parents = ['foo']
626 # parents = ['foo']
621 # if we asked for the children of 'foo', but had only added
627 # if we asked for the children of 'foo', but had only added
622 # self._parents, we wouldn't be able to respond ['bar'].
628 # self._parents, we wouldn't be able to respond ['bar'].
623 return _dirchildren(
629 return _dirchildren(
624 itertools.chain(self._dirs, self._roots, self._parents),
630 itertools.chain(self._dirs, self._roots, self._parents),
625 onlyinclude=self._parents)
631 onlyinclude=self._parents)
626
632
627 def visitchildrenset(self, dir):
633 def visitchildrenset(self, dir):
628 if self._prefix and dir in self._roots:
634 if self._prefix and dir in self._roots:
629 return 'all'
635 return 'all'
630 # Note: this does *not* include the 'dir in self._parents' case from
636 # Note: this does *not* include the 'dir in self._parents' case from
631 # visitdir, that's handled below.
637 # visitdir, that's handled below.
632 if ('.' in self._roots or
638 if ('.' in self._roots or
633 dir in self._roots or
639 dir in self._roots or
634 dir in self._dirs or
640 dir in self._dirs or
635 any(parentdir in self._roots
641 any(parentdir in self._roots
636 for parentdir in util.finddirs(dir))):
642 for parentdir in util.finddirs(dir))):
637 return 'this'
643 return 'this'
638
644
639 if dir in self._parents:
645 if dir in self._parents:
640 return self._allparentschildren.get(dir) or set()
646 return self._allparentschildren.get(dir) or set()
641 return set()
647 return set()
642
648
643 @encoding.strmethod
649 @encoding.strmethod
644 def __repr__(self):
650 def __repr__(self):
645 return ('<includematcher includes=%r>' % pycompat.bytestr(self._pats))
651 return ('<includematcher includes=%r>' % pycompat.bytestr(self._pats))
646
652
647 class exactmatcher(basematcher):
653 class exactmatcher(basematcher):
648 r'''Matches the input files exactly. They are interpreted as paths, not
654 r'''Matches the input files exactly. They are interpreted as paths, not
649 patterns (so no kind-prefixes).
655 patterns (so no kind-prefixes).
650
656
651 >>> m = exactmatcher([b'a.txt', br're:.*\.c$'])
657 >>> m = exactmatcher([b'a.txt', br're:.*\.c$'])
652 >>> m(b'a.txt')
658 >>> m(b'a.txt')
653 True
659 True
654 >>> m(b'b.txt')
660 >>> m(b'b.txt')
655 False
661 False
656
662
657 Input files that would be matched are exactly those returned by .files()
663 Input files that would be matched are exactly those returned by .files()
658 >>> m.files()
664 >>> m.files()
659 ['a.txt', 're:.*\\.c$']
665 ['a.txt', 're:.*\\.c$']
660
666
661 So pattern 're:.*\.c$' is not considered as a regex, but as a file name
667 So pattern 're:.*\.c$' is not considered as a regex, but as a file name
662 >>> m(b'main.c')
668 >>> m(b'main.c')
663 False
669 False
664 >>> m(br're:.*\.c$')
670 >>> m(br're:.*\.c$')
665 True
671 True
666 '''
672 '''
667
673
668 def __init__(self, files, badfn=None):
674 def __init__(self, files, badfn=None):
669 super(exactmatcher, self).__init__(badfn)
675 super(exactmatcher, self).__init__(badfn)
670
676
671 if isinstance(files, list):
677 if isinstance(files, list):
672 self._files = files
678 self._files = files
673 else:
679 else:
674 self._files = list(files)
680 self._files = list(files)
675
681
676 matchfn = basematcher.exact
682 matchfn = basematcher.exact
677
683
678 @propertycache
684 @propertycache
679 def _dirs(self):
685 def _dirs(self):
680 return set(util.dirs(self._fileset)) | {'.'}
686 return set(util.dirs(self._fileset)) | {'.'}
681
687
682 def visitdir(self, dir):
688 def visitdir(self, dir):
683 return dir in self._dirs
689 return dir in self._dirs
684
690
685 def visitchildrenset(self, dir):
691 def visitchildrenset(self, dir):
686 if not self._fileset or dir not in self._dirs:
692 if not self._fileset or dir not in self._dirs:
687 return set()
693 return set()
688
694
689 candidates = self._fileset | self._dirs - {'.'}
695 candidates = self._fileset | self._dirs - {'.'}
690 if dir != '.':
696 if dir != '.':
691 d = dir + '/'
697 d = dir + '/'
692 candidates = set(c[len(d):] for c in candidates if
698 candidates = set(c[len(d):] for c in candidates if
693 c.startswith(d))
699 c.startswith(d))
694 # self._dirs includes all of the directories, recursively, so if
700 # self._dirs includes all of the directories, recursively, so if
695 # we're attempting to match foo/bar/baz.txt, it'll have '.', 'foo',
701 # we're attempting to match foo/bar/baz.txt, it'll have '.', 'foo',
696 # 'foo/bar' in it. Thus we can safely ignore a candidate that has a
702 # 'foo/bar' in it. Thus we can safely ignore a candidate that has a
697 # '/' in it, indicating a it's for a subdir-of-a-subdir; the
703 # '/' in it, indicating a it's for a subdir-of-a-subdir; the
698 # immediate subdir will be in there without a slash.
704 # immediate subdir will be in there without a slash.
699 ret = {c for c in candidates if '/' not in c}
705 ret = {c for c in candidates if '/' not in c}
700 # We really do not expect ret to be empty, since that would imply that
706 # We really do not expect ret to be empty, since that would imply that
701 # there's something in _dirs that didn't have a file in _fileset.
707 # there's something in _dirs that didn't have a file in _fileset.
702 assert ret
708 assert ret
703 return ret
709 return ret
704
710
705 def isexact(self):
711 def isexact(self):
706 return True
712 return True
707
713
708 @encoding.strmethod
714 @encoding.strmethod
709 def __repr__(self):
715 def __repr__(self):
710 return ('<exactmatcher files=%r>' % self._files)
716 return ('<exactmatcher files=%r>' % self._files)
711
717
712 class differencematcher(basematcher):
718 class differencematcher(basematcher):
713 '''Composes two matchers by matching if the first matches and the second
719 '''Composes two matchers by matching if the first matches and the second
714 does not.
720 does not.
715
721
716 The second matcher's non-matching-attributes (bad, explicitdir,
722 The second matcher's non-matching-attributes (bad, explicitdir,
717 traversedir) are ignored.
723 traversedir) are ignored.
718 '''
724 '''
719 def __init__(self, m1, m2):
725 def __init__(self, m1, m2):
720 super(differencematcher, self).__init__()
726 super(differencematcher, self).__init__()
721 self._m1 = m1
727 self._m1 = m1
722 self._m2 = m2
728 self._m2 = m2
723 self.bad = m1.bad
729 self.bad = m1.bad
724 self.explicitdir = m1.explicitdir
730 self.explicitdir = m1.explicitdir
725 self.traversedir = m1.traversedir
731 self.traversedir = m1.traversedir
726
732
727 def matchfn(self, f):
733 def matchfn(self, f):
728 return self._m1(f) and not self._m2(f)
734 return self._m1(f) and not self._m2(f)
729
735
730 @propertycache
736 @propertycache
731 def _files(self):
737 def _files(self):
732 if self.isexact():
738 if self.isexact():
733 return [f for f in self._m1.files() if self(f)]
739 return [f for f in self._m1.files() if self(f)]
734 # If m1 is not an exact matcher, we can't easily figure out the set of
740 # If m1 is not an exact matcher, we can't easily figure out the set of
735 # files, because its files() are not always files. For example, if
741 # files, because its files() are not always files. For example, if
736 # m1 is "path:dir" and m2 is "rootfileins:.", we don't
742 # m1 is "path:dir" and m2 is "rootfileins:.", we don't
737 # want to remove "dir" from the set even though it would match m2,
743 # want to remove "dir" from the set even though it would match m2,
738 # because the "dir" in m1 may not be a file.
744 # because the "dir" in m1 may not be a file.
739 return self._m1.files()
745 return self._m1.files()
740
746
741 def visitdir(self, dir):
747 def visitdir(self, dir):
742 if self._m2.visitdir(dir) == 'all':
748 if self._m2.visitdir(dir) == 'all':
743 return False
749 return False
744 elif not self._m2.visitdir(dir):
750 elif not self._m2.visitdir(dir):
745 # m2 does not match dir, we can return 'all' here if possible
751 # m2 does not match dir, we can return 'all' here if possible
746 return self._m1.visitdir(dir)
752 return self._m1.visitdir(dir)
747 return bool(self._m1.visitdir(dir))
753 return bool(self._m1.visitdir(dir))
748
754
749 def visitchildrenset(self, dir):
755 def visitchildrenset(self, dir):
750 m2_set = self._m2.visitchildrenset(dir)
756 m2_set = self._m2.visitchildrenset(dir)
751 if m2_set == 'all':
757 if m2_set == 'all':
752 return set()
758 return set()
753 m1_set = self._m1.visitchildrenset(dir)
759 m1_set = self._m1.visitchildrenset(dir)
754 # Possible values for m1: 'all', 'this', set(...), set()
760 # Possible values for m1: 'all', 'this', set(...), set()
755 # Possible values for m2: 'this', set(...), set()
761 # Possible values for m2: 'this', set(...), set()
756 # If m2 has nothing under here that we care about, return m1, even if
762 # If m2 has nothing under here that we care about, return m1, even if
757 # it's 'all'. This is a change in behavior from visitdir, which would
763 # it's 'all'. This is a change in behavior from visitdir, which would
758 # return True, not 'all', for some reason.
764 # return True, not 'all', for some reason.
759 if not m2_set:
765 if not m2_set:
760 return m1_set
766 return m1_set
761 if m1_set in ['all', 'this']:
767 if m1_set in ['all', 'this']:
762 # Never return 'all' here if m2_set is any kind of non-empty (either
768 # Never return 'all' here if m2_set is any kind of non-empty (either
763 # 'this' or set(foo)), since m2 might return set() for a
769 # 'this' or set(foo)), since m2 might return set() for a
764 # subdirectory.
770 # subdirectory.
765 return 'this'
771 return 'this'
766 # Possible values for m1: set(...), set()
772 # Possible values for m1: set(...), set()
767 # Possible values for m2: 'this', set(...)
773 # Possible values for m2: 'this', set(...)
768 # We ignore m2's set results. They're possibly incorrect:
774 # We ignore m2's set results. They're possibly incorrect:
769 # m1 = path:dir/subdir, m2=rootfilesin:dir, visitchildrenset('.'):
775 # m1 = path:dir/subdir, m2=rootfilesin:dir, visitchildrenset('.'):
770 # m1 returns {'dir'}, m2 returns {'dir'}, if we subtracted we'd
776 # m1 returns {'dir'}, m2 returns {'dir'}, if we subtracted we'd
771 # return set(), which is *not* correct, we still need to visit 'dir'!
777 # return set(), which is *not* correct, we still need to visit 'dir'!
772 return m1_set
778 return m1_set
773
779
774 def isexact(self):
780 def isexact(self):
775 return self._m1.isexact()
781 return self._m1.isexact()
776
782
777 @encoding.strmethod
783 @encoding.strmethod
778 def __repr__(self):
784 def __repr__(self):
779 return ('<differencematcher m1=%r, m2=%r>' % (self._m1, self._m2))
785 return ('<differencematcher m1=%r, m2=%r>' % (self._m1, self._m2))
780
786
781 def intersectmatchers(m1, m2):
787 def intersectmatchers(m1, m2):
782 '''Composes two matchers by matching if both of them match.
788 '''Composes two matchers by matching if both of them match.
783
789
784 The second matcher's non-matching-attributes (bad, explicitdir,
790 The second matcher's non-matching-attributes (bad, explicitdir,
785 traversedir) are ignored.
791 traversedir) are ignored.
786 '''
792 '''
787 if m1 is None or m2 is None:
793 if m1 is None or m2 is None:
788 return m1 or m2
794 return m1 or m2
789 if m1.always():
795 if m1.always():
790 m = copy.copy(m2)
796 m = copy.copy(m2)
791 # TODO: Consider encapsulating these things in a class so there's only
797 # TODO: Consider encapsulating these things in a class so there's only
792 # one thing to copy from m1.
798 # one thing to copy from m1.
793 m.bad = m1.bad
799 m.bad = m1.bad
794 m.explicitdir = m1.explicitdir
800 m.explicitdir = m1.explicitdir
795 m.traversedir = m1.traversedir
801 m.traversedir = m1.traversedir
796 return m
802 return m
797 if m2.always():
803 if m2.always():
798 m = copy.copy(m1)
804 m = copy.copy(m1)
799 return m
805 return m
800 return intersectionmatcher(m1, m2)
806 return intersectionmatcher(m1, m2)
801
807
802 class intersectionmatcher(basematcher):
808 class intersectionmatcher(basematcher):
803 def __init__(self, m1, m2):
809 def __init__(self, m1, m2):
804 super(intersectionmatcher, self).__init__()
810 super(intersectionmatcher, self).__init__()
805 self._m1 = m1
811 self._m1 = m1
806 self._m2 = m2
812 self._m2 = m2
807 self.bad = m1.bad
813 self.bad = m1.bad
808 self.explicitdir = m1.explicitdir
814 self.explicitdir = m1.explicitdir
809 self.traversedir = m1.traversedir
815 self.traversedir = m1.traversedir
810
816
811 @propertycache
817 @propertycache
812 def _files(self):
818 def _files(self):
813 if self.isexact():
819 if self.isexact():
814 m1, m2 = self._m1, self._m2
820 m1, m2 = self._m1, self._m2
815 if not m1.isexact():
821 if not m1.isexact():
816 m1, m2 = m2, m1
822 m1, m2 = m2, m1
817 return [f for f in m1.files() if m2(f)]
823 return [f for f in m1.files() if m2(f)]
818 # It neither m1 nor m2 is an exact matcher, we can't easily intersect
824 # It neither m1 nor m2 is an exact matcher, we can't easily intersect
819 # the set of files, because their files() are not always files. For
825 # the set of files, because their files() are not always files. For
820 # example, if intersecting a matcher "-I glob:foo.txt" with matcher of
826 # example, if intersecting a matcher "-I glob:foo.txt" with matcher of
821 # "path:dir2", we don't want to remove "dir2" from the set.
827 # "path:dir2", we don't want to remove "dir2" from the set.
822 return self._m1.files() + self._m2.files()
828 return self._m1.files() + self._m2.files()
823
829
824 def matchfn(self, f):
830 def matchfn(self, f):
825 return self._m1(f) and self._m2(f)
831 return self._m1(f) and self._m2(f)
826
832
827 def visitdir(self, dir):
833 def visitdir(self, dir):
828 visit1 = self._m1.visitdir(dir)
834 visit1 = self._m1.visitdir(dir)
829 if visit1 == 'all':
835 if visit1 == 'all':
830 return self._m2.visitdir(dir)
836 return self._m2.visitdir(dir)
831 # bool() because visit1=True + visit2='all' should not be 'all'
837 # bool() because visit1=True + visit2='all' should not be 'all'
832 return bool(visit1 and self._m2.visitdir(dir))
838 return bool(visit1 and self._m2.visitdir(dir))
833
839
834 def visitchildrenset(self, dir):
840 def visitchildrenset(self, dir):
835 m1_set = self._m1.visitchildrenset(dir)
841 m1_set = self._m1.visitchildrenset(dir)
836 if not m1_set:
842 if not m1_set:
837 return set()
843 return set()
838 m2_set = self._m2.visitchildrenset(dir)
844 m2_set = self._m2.visitchildrenset(dir)
839 if not m2_set:
845 if not m2_set:
840 return set()
846 return set()
841
847
842 if m1_set == 'all':
848 if m1_set == 'all':
843 return m2_set
849 return m2_set
844 elif m2_set == 'all':
850 elif m2_set == 'all':
845 return m1_set
851 return m1_set
846
852
847 if m1_set == 'this' or m2_set == 'this':
853 if m1_set == 'this' or m2_set == 'this':
848 return 'this'
854 return 'this'
849
855
850 assert isinstance(m1_set, set) and isinstance(m2_set, set)
856 assert isinstance(m1_set, set) and isinstance(m2_set, set)
851 return m1_set.intersection(m2_set)
857 return m1_set.intersection(m2_set)
852
858
853 def always(self):
859 def always(self):
854 return self._m1.always() and self._m2.always()
860 return self._m1.always() and self._m2.always()
855
861
856 def isexact(self):
862 def isexact(self):
857 return self._m1.isexact() or self._m2.isexact()
863 return self._m1.isexact() or self._m2.isexact()
858
864
859 @encoding.strmethod
865 @encoding.strmethod
860 def __repr__(self):
866 def __repr__(self):
861 return ('<intersectionmatcher m1=%r, m2=%r>' % (self._m1, self._m2))
867 return ('<intersectionmatcher m1=%r, m2=%r>' % (self._m1, self._m2))
862
868
863 class subdirmatcher(basematcher):
869 class subdirmatcher(basematcher):
864 """Adapt a matcher to work on a subdirectory only.
870 """Adapt a matcher to work on a subdirectory only.
865
871
866 The paths are remapped to remove/insert the path as needed:
872 The paths are remapped to remove/insert the path as needed:
867
873
868 >>> from . import pycompat
874 >>> from . import pycompat
869 >>> m1 = match(b'root', b'', [b'a.txt', b'sub/b.txt'])
875 >>> m1 = match(b'root', b'', [b'a.txt', b'sub/b.txt'])
870 >>> m2 = subdirmatcher(b'sub', m1)
876 >>> m2 = subdirmatcher(b'sub', m1)
871 >>> m2(b'a.txt')
877 >>> m2(b'a.txt')
872 False
878 False
873 >>> m2(b'b.txt')
879 >>> m2(b'b.txt')
874 True
880 True
875 >>> m2.matchfn(b'a.txt')
881 >>> m2.matchfn(b'a.txt')
876 False
882 False
877 >>> m2.matchfn(b'b.txt')
883 >>> m2.matchfn(b'b.txt')
878 True
884 True
879 >>> m2.files()
885 >>> m2.files()
880 ['b.txt']
886 ['b.txt']
881 >>> m2.exact(b'b.txt')
887 >>> m2.exact(b'b.txt')
882 True
888 True
883 >>> def bad(f, msg):
889 >>> def bad(f, msg):
884 ... print(pycompat.sysstr(b"%s: %s" % (f, msg)))
890 ... print(pycompat.sysstr(b"%s: %s" % (f, msg)))
885 >>> m1.bad = bad
891 >>> m1.bad = bad
886 >>> m2.bad(b'x.txt', b'No such file')
892 >>> m2.bad(b'x.txt', b'No such file')
887 sub/x.txt: No such file
893 sub/x.txt: No such file
888 """
894 """
889
895
890 def __init__(self, path, matcher):
896 def __init__(self, path, matcher):
891 super(subdirmatcher, self).__init__()
897 super(subdirmatcher, self).__init__()
892 self._path = path
898 self._path = path
893 self._matcher = matcher
899 self._matcher = matcher
894 self._always = matcher.always()
900 self._always = matcher.always()
895
901
896 self._files = [f[len(path) + 1:] for f in matcher._files
902 self._files = [f[len(path) + 1:] for f in matcher._files
897 if f.startswith(path + "/")]
903 if f.startswith(path + "/")]
898
904
899 # If the parent repo had a path to this subrepo and the matcher is
905 # If the parent repo had a path to this subrepo and the matcher is
900 # a prefix matcher, this submatcher always matches.
906 # a prefix matcher, this submatcher always matches.
901 if matcher.prefix():
907 if matcher.prefix():
902 self._always = any(f == path for f in matcher._files)
908 self._always = any(f == path for f in matcher._files)
903
909
904 def bad(self, f, msg):
910 def bad(self, f, msg):
905 self._matcher.bad(self._path + "/" + f, msg)
911 self._matcher.bad(self._path + "/" + f, msg)
906
912
907 def matchfn(self, f):
913 def matchfn(self, f):
908 # Some information is lost in the superclass's constructor, so we
914 # Some information is lost in the superclass's constructor, so we
909 # can not accurately create the matching function for the subdirectory
915 # can not accurately create the matching function for the subdirectory
910 # from the inputs. Instead, we override matchfn() and visitdir() to
916 # from the inputs. Instead, we override matchfn() and visitdir() to
911 # call the original matcher with the subdirectory path prepended.
917 # call the original matcher with the subdirectory path prepended.
912 return self._matcher.matchfn(self._path + "/" + f)
918 return self._matcher.matchfn(self._path + "/" + f)
913
919
914 def visitdir(self, dir):
920 def visitdir(self, dir):
915 if dir == '.':
921 if dir == '.':
916 dir = self._path
922 dir = self._path
917 else:
923 else:
918 dir = self._path + "/" + dir
924 dir = self._path + "/" + dir
919 return self._matcher.visitdir(dir)
925 return self._matcher.visitdir(dir)
920
926
921 def visitchildrenset(self, dir):
927 def visitchildrenset(self, dir):
922 if dir == '.':
928 if dir == '.':
923 dir = self._path
929 dir = self._path
924 else:
930 else:
925 dir = self._path + "/" + dir
931 dir = self._path + "/" + dir
926 return self._matcher.visitchildrenset(dir)
932 return self._matcher.visitchildrenset(dir)
927
933
928 def always(self):
934 def always(self):
929 return self._always
935 return self._always
930
936
931 def prefix(self):
937 def prefix(self):
932 return self._matcher.prefix() and not self._always
938 return self._matcher.prefix() and not self._always
933
939
934 @encoding.strmethod
940 @encoding.strmethod
935 def __repr__(self):
941 def __repr__(self):
936 return ('<subdirmatcher path=%r, matcher=%r>' %
942 return ('<subdirmatcher path=%r, matcher=%r>' %
937 (self._path, self._matcher))
943 (self._path, self._matcher))
938
944
939 class prefixdirmatcher(basematcher):
945 class prefixdirmatcher(basematcher):
940 """Adapt a matcher to work on a parent directory.
946 """Adapt a matcher to work on a parent directory.
941
947
942 The matcher's non-matching-attributes (bad, explicitdir, traversedir) are
948 The matcher's non-matching-attributes (bad, explicitdir, traversedir) are
943 ignored.
949 ignored.
944
950
945 The prefix path should usually be the relative path from the root of
951 The prefix path should usually be the relative path from the root of
946 this matcher to the root of the wrapped matcher.
952 this matcher to the root of the wrapped matcher.
947
953
948 >>> m1 = match(util.localpath(b'root/d/e'), b'f', [b'../a.txt', b'b.txt'])
954 >>> m1 = match(util.localpath(b'root/d/e'), b'f', [b'../a.txt', b'b.txt'])
949 >>> m2 = prefixdirmatcher(b'd/e', m1)
955 >>> m2 = prefixdirmatcher(b'd/e', m1)
950 >>> m2(b'a.txt')
956 >>> m2(b'a.txt')
951 False
957 False
952 >>> m2(b'd/e/a.txt')
958 >>> m2(b'd/e/a.txt')
953 True
959 True
954 >>> m2(b'd/e/b.txt')
960 >>> m2(b'd/e/b.txt')
955 False
961 False
956 >>> m2.files()
962 >>> m2.files()
957 ['d/e/a.txt', 'd/e/f/b.txt']
963 ['d/e/a.txt', 'd/e/f/b.txt']
958 >>> m2.exact(b'd/e/a.txt')
964 >>> m2.exact(b'd/e/a.txt')
959 True
965 True
960 >>> m2.visitdir(b'd')
966 >>> m2.visitdir(b'd')
961 True
967 True
962 >>> m2.visitdir(b'd/e')
968 >>> m2.visitdir(b'd/e')
963 True
969 True
964 >>> m2.visitdir(b'd/e/f')
970 >>> m2.visitdir(b'd/e/f')
965 True
971 True
966 >>> m2.visitdir(b'd/e/g')
972 >>> m2.visitdir(b'd/e/g')
967 False
973 False
968 >>> m2.visitdir(b'd/ef')
974 >>> m2.visitdir(b'd/ef')
969 False
975 False
970 """
976 """
971
977
972 def __init__(self, path, matcher, badfn=None):
978 def __init__(self, path, matcher, badfn=None):
973 super(prefixdirmatcher, self).__init__(badfn)
979 super(prefixdirmatcher, self).__init__(badfn)
974 if not path:
980 if not path:
975 raise error.ProgrammingError('prefix path must not be empty')
981 raise error.ProgrammingError('prefix path must not be empty')
976 self._path = path
982 self._path = path
977 self._pathprefix = path + '/'
983 self._pathprefix = path + '/'
978 self._matcher = matcher
984 self._matcher = matcher
979
985
980 @propertycache
986 @propertycache
981 def _files(self):
987 def _files(self):
982 return [self._pathprefix + f for f in self._matcher._files]
988 return [self._pathprefix + f for f in self._matcher._files]
983
989
984 def matchfn(self, f):
990 def matchfn(self, f):
985 if not f.startswith(self._pathprefix):
991 if not f.startswith(self._pathprefix):
986 return False
992 return False
987 return self._matcher.matchfn(f[len(self._pathprefix):])
993 return self._matcher.matchfn(f[len(self._pathprefix):])
988
994
989 @propertycache
995 @propertycache
990 def _pathdirs(self):
996 def _pathdirs(self):
991 return set(util.finddirs(self._path)) | {'.'}
997 return set(util.finddirs(self._path)) | {'.'}
992
998
993 def visitdir(self, dir):
999 def visitdir(self, dir):
994 if dir == self._path:
1000 if dir == self._path:
995 return self._matcher.visitdir('.')
1001 return self._matcher.visitdir('.')
996 if dir.startswith(self._pathprefix):
1002 if dir.startswith(self._pathprefix):
997 return self._matcher.visitdir(dir[len(self._pathprefix):])
1003 return self._matcher.visitdir(dir[len(self._pathprefix):])
998 return dir in self._pathdirs
1004 return dir in self._pathdirs
999
1005
1000 def visitchildrenset(self, dir):
1006 def visitchildrenset(self, dir):
1001 if dir == self._path:
1007 if dir == self._path:
1002 return self._matcher.visitchildrenset('.')
1008 return self._matcher.visitchildrenset('.')
1003 if dir.startswith(self._pathprefix):
1009 if dir.startswith(self._pathprefix):
1004 return self._matcher.visitchildrenset(dir[len(self._pathprefix):])
1010 return self._matcher.visitchildrenset(dir[len(self._pathprefix):])
1005 if dir in self._pathdirs:
1011 if dir in self._pathdirs:
1006 return 'this'
1012 return 'this'
1007 return set()
1013 return set()
1008
1014
1009 def isexact(self):
1015 def isexact(self):
1010 return self._matcher.isexact()
1016 return self._matcher.isexact()
1011
1017
1012 def prefix(self):
1018 def prefix(self):
1013 return self._matcher.prefix()
1019 return self._matcher.prefix()
1014
1020
1015 @encoding.strmethod
1021 @encoding.strmethod
1016 def __repr__(self):
1022 def __repr__(self):
1017 return ('<prefixdirmatcher path=%r, matcher=%r>'
1023 return ('<prefixdirmatcher path=%r, matcher=%r>'
1018 % (pycompat.bytestr(self._path), self._matcher))
1024 % (pycompat.bytestr(self._path), self._matcher))
1019
1025
1020 class unionmatcher(basematcher):
1026 class unionmatcher(basematcher):
1021 """A matcher that is the union of several matchers.
1027 """A matcher that is the union of several matchers.
1022
1028
1023 The non-matching-attributes (bad, explicitdir, traversedir) are taken from
1029 The non-matching-attributes (bad, explicitdir, traversedir) are taken from
1024 the first matcher.
1030 the first matcher.
1025 """
1031 """
1026
1032
1027 def __init__(self, matchers):
1033 def __init__(self, matchers):
1028 m1 = matchers[0]
1034 m1 = matchers[0]
1029 super(unionmatcher, self).__init__()
1035 super(unionmatcher, self).__init__()
1030 self.explicitdir = m1.explicitdir
1036 self.explicitdir = m1.explicitdir
1031 self.traversedir = m1.traversedir
1037 self.traversedir = m1.traversedir
1032 self._matchers = matchers
1038 self._matchers = matchers
1033
1039
1034 def matchfn(self, f):
1040 def matchfn(self, f):
1035 for match in self._matchers:
1041 for match in self._matchers:
1036 if match(f):
1042 if match(f):
1037 return True
1043 return True
1038 return False
1044 return False
1039
1045
1040 def visitdir(self, dir):
1046 def visitdir(self, dir):
1041 r = False
1047 r = False
1042 for m in self._matchers:
1048 for m in self._matchers:
1043 v = m.visitdir(dir)
1049 v = m.visitdir(dir)
1044 if v == 'all':
1050 if v == 'all':
1045 return v
1051 return v
1046 r |= v
1052 r |= v
1047 return r
1053 return r
1048
1054
1049 def visitchildrenset(self, dir):
1055 def visitchildrenset(self, dir):
1050 r = set()
1056 r = set()
1051 this = False
1057 this = False
1052 for m in self._matchers:
1058 for m in self._matchers:
1053 v = m.visitchildrenset(dir)
1059 v = m.visitchildrenset(dir)
1054 if not v:
1060 if not v:
1055 continue
1061 continue
1056 if v == 'all':
1062 if v == 'all':
1057 return v
1063 return v
1058 if this or v == 'this':
1064 if this or v == 'this':
1059 this = True
1065 this = True
1060 # don't break, we might have an 'all' in here.
1066 # don't break, we might have an 'all' in here.
1061 continue
1067 continue
1062 assert isinstance(v, set)
1068 assert isinstance(v, set)
1063 r = r.union(v)
1069 r = r.union(v)
1064 if this:
1070 if this:
1065 return 'this'
1071 return 'this'
1066 return r
1072 return r
1067
1073
1068 @encoding.strmethod
1074 @encoding.strmethod
1069 def __repr__(self):
1075 def __repr__(self):
1070 return ('<unionmatcher matchers=%r>' % self._matchers)
1076 return ('<unionmatcher matchers=%r>' % self._matchers)
1071
1077
1072 def patkind(pattern, default=None):
1078 def patkind(pattern, default=None):
1073 '''If pattern is 'kind:pat' with a known kind, return kind.
1079 '''If pattern is 'kind:pat' with a known kind, return kind.
1074
1080
1075 >>> patkind(br're:.*\.c$')
1081 >>> patkind(br're:.*\.c$')
1076 're'
1082 're'
1077 >>> patkind(b'glob:*.c')
1083 >>> patkind(b'glob:*.c')
1078 'glob'
1084 'glob'
1079 >>> patkind(b'relpath:test.py')
1085 >>> patkind(b'relpath:test.py')
1080 'relpath'
1086 'relpath'
1081 >>> patkind(b'main.py')
1087 >>> patkind(b'main.py')
1082 >>> patkind(b'main.py', default=b're')
1088 >>> patkind(b'main.py', default=b're')
1083 're'
1089 're'
1084 '''
1090 '''
1085 return _patsplit(pattern, default)[0]
1091 return _patsplit(pattern, default)[0]
1086
1092
1087 def _patsplit(pattern, default):
1093 def _patsplit(pattern, default):
1088 """Split a string into the optional pattern kind prefix and the actual
1094 """Split a string into the optional pattern kind prefix and the actual
1089 pattern."""
1095 pattern."""
1090 if ':' in pattern:
1096 if ':' in pattern:
1091 kind, pat = pattern.split(':', 1)
1097 kind, pat = pattern.split(':', 1)
1092 if kind in allpatternkinds:
1098 if kind in allpatternkinds:
1093 return kind, pat
1099 return kind, pat
1094 return default, pattern
1100 return default, pattern
1095
1101
1096 def _globre(pat):
1102 def _globre(pat):
1097 r'''Convert an extended glob string to a regexp string.
1103 r'''Convert an extended glob string to a regexp string.
1098
1104
1099 >>> from . import pycompat
1105 >>> from . import pycompat
1100 >>> def bprint(s):
1106 >>> def bprint(s):
1101 ... print(pycompat.sysstr(s))
1107 ... print(pycompat.sysstr(s))
1102 >>> bprint(_globre(br'?'))
1108 >>> bprint(_globre(br'?'))
1103 .
1109 .
1104 >>> bprint(_globre(br'*'))
1110 >>> bprint(_globre(br'*'))
1105 [^/]*
1111 [^/]*
1106 >>> bprint(_globre(br'**'))
1112 >>> bprint(_globre(br'**'))
1107 .*
1113 .*
1108 >>> bprint(_globre(br'**/a'))
1114 >>> bprint(_globre(br'**/a'))
1109 (?:.*/)?a
1115 (?:.*/)?a
1110 >>> bprint(_globre(br'a/**/b'))
1116 >>> bprint(_globre(br'a/**/b'))
1111 a/(?:.*/)?b
1117 a/(?:.*/)?b
1112 >>> bprint(_globre(br'[a*?!^][^b][!c]'))
1118 >>> bprint(_globre(br'[a*?!^][^b][!c]'))
1113 [a*?!^][\^b][^c]
1119 [a*?!^][\^b][^c]
1114 >>> bprint(_globre(br'{a,b}'))
1120 >>> bprint(_globre(br'{a,b}'))
1115 (?:a|b)
1121 (?:a|b)
1116 >>> bprint(_globre(br'.\*\?'))
1122 >>> bprint(_globre(br'.\*\?'))
1117 \.\*\?
1123 \.\*\?
1118 '''
1124 '''
1119 i, n = 0, len(pat)
1125 i, n = 0, len(pat)
1120 res = ''
1126 res = ''
1121 group = 0
1127 group = 0
1122 escape = util.stringutil.regexbytesescapemap.get
1128 escape = util.stringutil.regexbytesescapemap.get
1123 def peek():
1129 def peek():
1124 return i < n and pat[i:i + 1]
1130 return i < n and pat[i:i + 1]
1125 while i < n:
1131 while i < n:
1126 c = pat[i:i + 1]
1132 c = pat[i:i + 1]
1127 i += 1
1133 i += 1
1128 if c not in '*?[{},\\':
1134 if c not in '*?[{},\\':
1129 res += escape(c, c)
1135 res += escape(c, c)
1130 elif c == '*':
1136 elif c == '*':
1131 if peek() == '*':
1137 if peek() == '*':
1132 i += 1
1138 i += 1
1133 if peek() == '/':
1139 if peek() == '/':
1134 i += 1
1140 i += 1
1135 res += '(?:.*/)?'
1141 res += '(?:.*/)?'
1136 else:
1142 else:
1137 res += '.*'
1143 res += '.*'
1138 else:
1144 else:
1139 res += '[^/]*'
1145 res += '[^/]*'
1140 elif c == '?':
1146 elif c == '?':
1141 res += '.'
1147 res += '.'
1142 elif c == '[':
1148 elif c == '[':
1143 j = i
1149 j = i
1144 if j < n and pat[j:j + 1] in '!]':
1150 if j < n and pat[j:j + 1] in '!]':
1145 j += 1
1151 j += 1
1146 while j < n and pat[j:j + 1] != ']':
1152 while j < n and pat[j:j + 1] != ']':
1147 j += 1
1153 j += 1
1148 if j >= n:
1154 if j >= n:
1149 res += '\\['
1155 res += '\\['
1150 else:
1156 else:
1151 stuff = pat[i:j].replace('\\','\\\\')
1157 stuff = pat[i:j].replace('\\','\\\\')
1152 i = j + 1
1158 i = j + 1
1153 if stuff[0:1] == '!':
1159 if stuff[0:1] == '!':
1154 stuff = '^' + stuff[1:]
1160 stuff = '^' + stuff[1:]
1155 elif stuff[0:1] == '^':
1161 elif stuff[0:1] == '^':
1156 stuff = '\\' + stuff
1162 stuff = '\\' + stuff
1157 res = '%s[%s]' % (res, stuff)
1163 res = '%s[%s]' % (res, stuff)
1158 elif c == '{':
1164 elif c == '{':
1159 group += 1
1165 group += 1
1160 res += '(?:'
1166 res += '(?:'
1161 elif c == '}' and group:
1167 elif c == '}' and group:
1162 res += ')'
1168 res += ')'
1163 group -= 1
1169 group -= 1
1164 elif c == ',' and group:
1170 elif c == ',' and group:
1165 res += '|'
1171 res += '|'
1166 elif c == '\\':
1172 elif c == '\\':
1167 p = peek()
1173 p = peek()
1168 if p:
1174 if p:
1169 i += 1
1175 i += 1
1170 res += escape(p, p)
1176 res += escape(p, p)
1171 else:
1177 else:
1172 res += escape(c, c)
1178 res += escape(c, c)
1173 else:
1179 else:
1174 res += escape(c, c)
1180 res += escape(c, c)
1175 return res
1181 return res
1176
1182
1177 def _regex(kind, pat, globsuffix):
1183 def _regex(kind, pat, globsuffix):
1178 '''Convert a (normalized) pattern of any kind into a regular expression.
1184 '''Convert a (normalized) pattern of any kind into a
1185 regular expression.
1179 globsuffix is appended to the regexp of globs.'''
1186 globsuffix is appended to the regexp of globs.'''
1187
1188 if rustext is not None:
1189 try:
1190 return rustext.filepatterns.build_single_regex(
1191 kind,
1192 pat,
1193 globsuffix
1194 )
1195 except rustext.filepatterns.PatternError:
1196 raise error.ProgrammingError(
1197 'not a regex pattern: %s:%s' % (kind, pat)
1198 )
1199
1180 if not pat:
1200 if not pat:
1181 return ''
1201 return ''
1182 if kind == 're':
1202 if kind == 're':
1183 return pat
1203 return pat
1184 if kind in ('path', 'relpath'):
1204 if kind in ('path', 'relpath'):
1185 if pat == '.':
1205 if pat == '.':
1186 return ''
1206 return ''
1187 return util.stringutil.reescape(pat) + '(?:/|$)'
1207 return util.stringutil.reescape(pat) + '(?:/|$)'
1188 if kind == 'rootfilesin':
1208 if kind == 'rootfilesin':
1189 if pat == '.':
1209 if pat == '.':
1190 escaped = ''
1210 escaped = ''
1191 else:
1211 else:
1192 # Pattern is a directory name.
1212 # Pattern is a directory name.
1193 escaped = util.stringutil.reescape(pat) + '/'
1213 escaped = util.stringutil.reescape(pat) + '/'
1194 # Anything after the pattern must be a non-directory.
1214 # Anything after the pattern must be a non-directory.
1195 return escaped + '[^/]+$'
1215 return escaped + '[^/]+$'
1196 if kind == 'relglob':
1216 if kind == 'relglob':
1197 return '(?:|.*/)' + _globre(pat) + globsuffix
1217 return '(?:|.*/)' + _globre(pat) + globsuffix
1198 if kind == 'relre':
1218 if kind == 'relre':
1199 if pat.startswith('^'):
1219 if pat.startswith('^'):
1200 return pat
1220 return pat
1201 return '.*' + pat
1221 return '.*' + pat
1202 if kind in ('glob', 'rootglob'):
1222 if kind in ('glob', 'rootglob'):
1203 return _globre(pat) + globsuffix
1223 return _globre(pat) + globsuffix
1204 raise error.ProgrammingError('not a regex pattern: %s:%s' % (kind, pat))
1224 raise error.ProgrammingError('not a regex pattern: %s:%s' % (kind, pat))
1205
1225
1206 def _buildmatch(kindpats, globsuffix, root):
1226 def _buildmatch(kindpats, globsuffix, root):
1207 '''Return regexp string and a matcher function for kindpats.
1227 '''Return regexp string and a matcher function for kindpats.
1208 globsuffix is appended to the regexp of globs.'''
1228 globsuffix is appended to the regexp of globs.'''
1209 matchfuncs = []
1229 matchfuncs = []
1210
1230
1211 subincludes, kindpats = _expandsubinclude(kindpats, root)
1231 subincludes, kindpats = _expandsubinclude(kindpats, root)
1212 if subincludes:
1232 if subincludes:
1213 submatchers = {}
1233 submatchers = {}
1214 def matchsubinclude(f):
1234 def matchsubinclude(f):
1215 for prefix, matcherargs in subincludes:
1235 for prefix, matcherargs in subincludes:
1216 if f.startswith(prefix):
1236 if f.startswith(prefix):
1217 mf = submatchers.get(prefix)
1237 mf = submatchers.get(prefix)
1218 if mf is None:
1238 if mf is None:
1219 mf = match(*matcherargs)
1239 mf = match(*matcherargs)
1220 submatchers[prefix] = mf
1240 submatchers[prefix] = mf
1221
1241
1222 if mf(f[len(prefix):]):
1242 if mf(f[len(prefix):]):
1223 return True
1243 return True
1224 return False
1244 return False
1225 matchfuncs.append(matchsubinclude)
1245 matchfuncs.append(matchsubinclude)
1226
1246
1227 regex = ''
1247 regex = ''
1228 if kindpats:
1248 if kindpats:
1229 if all(k == 'rootfilesin' for k, p, s in kindpats):
1249 if all(k == 'rootfilesin' for k, p, s in kindpats):
1230 dirs = {p for k, p, s in kindpats}
1250 dirs = {p for k, p, s in kindpats}
1231 def mf(f):
1251 def mf(f):
1232 i = f.rfind('/')
1252 i = f.rfind('/')
1233 if i >= 0:
1253 if i >= 0:
1234 dir = f[:i]
1254 dir = f[:i]
1235 else:
1255 else:
1236 dir = '.'
1256 dir = '.'
1237 return dir in dirs
1257 return dir in dirs
1238 regex = b'rootfilesin: %s' % stringutil.pprint(list(sorted(dirs)))
1258 regex = b'rootfilesin: %s' % stringutil.pprint(list(sorted(dirs)))
1239 matchfuncs.append(mf)
1259 matchfuncs.append(mf)
1240 else:
1260 else:
1241 regex, mf = _buildregexmatch(kindpats, globsuffix)
1261 regex, mf = _buildregexmatch(kindpats, globsuffix)
1242 matchfuncs.append(mf)
1262 matchfuncs.append(mf)
1243
1263
1244 if len(matchfuncs) == 1:
1264 if len(matchfuncs) == 1:
1245 return regex, matchfuncs[0]
1265 return regex, matchfuncs[0]
1246 else:
1266 else:
1247 return regex, lambda f: any(mf(f) for mf in matchfuncs)
1267 return regex, lambda f: any(mf(f) for mf in matchfuncs)
1248
1268
1249 MAX_RE_SIZE = 20000
1269 MAX_RE_SIZE = 20000
1250
1270
1251 def _joinregexes(regexps):
1271 def _joinregexes(regexps):
1252 """gather multiple regular expressions into a single one"""
1272 """gather multiple regular expressions into a single one"""
1253 return '|'.join(regexps)
1273 return '|'.join(regexps)
1254
1274
1255 def _buildregexmatch(kindpats, globsuffix):
1275 def _buildregexmatch(kindpats, globsuffix):
1256 """Build a match function from a list of kinds and kindpats,
1276 """Build a match function from a list of kinds and kindpats,
1257 return regexp string and a matcher function.
1277 return regexp string and a matcher function.
1258
1278
1259 Test too large input
1279 Test too large input
1260 >>> _buildregexmatch([
1280 >>> _buildregexmatch([
1261 ... (b'relglob', b'?' * MAX_RE_SIZE, b'')
1281 ... (b'relglob', b'?' * MAX_RE_SIZE, b'')
1262 ... ], b'$')
1282 ... ], b'$')
1263 Traceback (most recent call last):
1283 Traceback (most recent call last):
1264 ...
1284 ...
1265 Abort: matcher pattern is too long (20009 bytes)
1285 Abort: matcher pattern is too long (20009 bytes)
1266 """
1286 """
1267 try:
1287 try:
1268 allgroups = []
1288 allgroups = []
1269 regexps = [_regex(k, p, globsuffix) for (k, p, s) in kindpats]
1289 regexps = [_regex(k, p, globsuffix) for (k, p, s) in kindpats]
1270 fullregexp = _joinregexes(regexps)
1290 fullregexp = _joinregexes(regexps)
1271
1291
1272 startidx = 0
1292 startidx = 0
1273 groupsize = 0
1293 groupsize = 0
1274 for idx, r in enumerate(regexps):
1294 for idx, r in enumerate(regexps):
1275 piecesize = len(r)
1295 piecesize = len(r)
1276 if piecesize > MAX_RE_SIZE:
1296 if piecesize > MAX_RE_SIZE:
1277 msg = _("matcher pattern is too long (%d bytes)") % piecesize
1297 msg = _("matcher pattern is too long (%d bytes)") % piecesize
1278 raise error.Abort(msg)
1298 raise error.Abort(msg)
1279 elif (groupsize + piecesize) > MAX_RE_SIZE:
1299 elif (groupsize + piecesize) > MAX_RE_SIZE:
1280 group = regexps[startidx:idx]
1300 group = regexps[startidx:idx]
1281 allgroups.append(_joinregexes(group))
1301 allgroups.append(_joinregexes(group))
1282 startidx = idx
1302 startidx = idx
1283 groupsize = 0
1303 groupsize = 0
1284 groupsize += piecesize + 1
1304 groupsize += piecesize + 1
1285
1305
1286 if startidx == 0:
1306 if startidx == 0:
1287 matcher = _rematcher(fullregexp)
1307 matcher = _rematcher(fullregexp)
1288 func = lambda s: bool(matcher(s))
1308 func = lambda s: bool(matcher(s))
1289 else:
1309 else:
1290 group = regexps[startidx:]
1310 group = regexps[startidx:]
1291 allgroups.append(_joinregexes(group))
1311 allgroups.append(_joinregexes(group))
1292 allmatchers = [_rematcher(g) for g in allgroups]
1312 allmatchers = [_rematcher(g) for g in allgroups]
1293 func = lambda s: any(m(s) for m in allmatchers)
1313 func = lambda s: any(m(s) for m in allmatchers)
1294 return fullregexp, func
1314 return fullregexp, func
1295 except re.error:
1315 except re.error:
1296 for k, p, s in kindpats:
1316 for k, p, s in kindpats:
1297 try:
1317 try:
1298 _rematcher(_regex(k, p, globsuffix))
1318 _rematcher(_regex(k, p, globsuffix))
1299 except re.error:
1319 except re.error:
1300 if s:
1320 if s:
1301 raise error.Abort(_("%s: invalid pattern (%s): %s") %
1321 raise error.Abort(_("%s: invalid pattern (%s): %s") %
1302 (s, k, p))
1322 (s, k, p))
1303 else:
1323 else:
1304 raise error.Abort(_("invalid pattern (%s): %s") % (k, p))
1324 raise error.Abort(_("invalid pattern (%s): %s") % (k, p))
1305 raise error.Abort(_("invalid pattern"))
1325 raise error.Abort(_("invalid pattern"))
1306
1326
1307 def _patternrootsanddirs(kindpats):
1327 def _patternrootsanddirs(kindpats):
1308 '''Returns roots and directories corresponding to each pattern.
1328 '''Returns roots and directories corresponding to each pattern.
1309
1329
1310 This calculates the roots and directories exactly matching the patterns and
1330 This calculates the roots and directories exactly matching the patterns and
1311 returns a tuple of (roots, dirs) for each. It does not return other
1331 returns a tuple of (roots, dirs) for each. It does not return other
1312 directories which may also need to be considered, like the parent
1332 directories which may also need to be considered, like the parent
1313 directories.
1333 directories.
1314 '''
1334 '''
1315 r = []
1335 r = []
1316 d = []
1336 d = []
1317 for kind, pat, source in kindpats:
1337 for kind, pat, source in kindpats:
1318 if kind in ('glob', 'rootglob'): # find the non-glob prefix
1338 if kind in ('glob', 'rootglob'): # find the non-glob prefix
1319 root = []
1339 root = []
1320 for p in pat.split('/'):
1340 for p in pat.split('/'):
1321 if '[' in p or '{' in p or '*' in p or '?' in p:
1341 if '[' in p or '{' in p or '*' in p or '?' in p:
1322 break
1342 break
1323 root.append(p)
1343 root.append(p)
1324 r.append('/'.join(root) or '.')
1344 r.append('/'.join(root) or '.')
1325 elif kind in ('relpath', 'path'):
1345 elif kind in ('relpath', 'path'):
1326 r.append(pat or '.')
1346 r.append(pat or '.')
1327 elif kind in ('rootfilesin',):
1347 elif kind in ('rootfilesin',):
1328 d.append(pat or '.')
1348 d.append(pat or '.')
1329 else: # relglob, re, relre
1349 else: # relglob, re, relre
1330 r.append('.')
1350 r.append('.')
1331 return r, d
1351 return r, d
1332
1352
1333 def _roots(kindpats):
1353 def _roots(kindpats):
1334 '''Returns root directories to match recursively from the given patterns.'''
1354 '''Returns root directories to match recursively from the given patterns.'''
1335 roots, dirs = _patternrootsanddirs(kindpats)
1355 roots, dirs = _patternrootsanddirs(kindpats)
1336 return roots
1356 return roots
1337
1357
1338 def _rootsdirsandparents(kindpats):
1358 def _rootsdirsandparents(kindpats):
1339 '''Returns roots and exact directories from patterns.
1359 '''Returns roots and exact directories from patterns.
1340
1360
1341 `roots` are directories to match recursively, `dirs` should
1361 `roots` are directories to match recursively, `dirs` should
1342 be matched non-recursively, and `parents` are the implicitly required
1362 be matched non-recursively, and `parents` are the implicitly required
1343 directories to walk to items in either roots or dirs.
1363 directories to walk to items in either roots or dirs.
1344
1364
1345 Returns a tuple of (roots, dirs, parents).
1365 Returns a tuple of (roots, dirs, parents).
1346
1366
1347 >>> _rootsdirsandparents(
1367 >>> _rootsdirsandparents(
1348 ... [(b'glob', b'g/h/*', b''), (b'glob', b'g/h', b''),
1368 ... [(b'glob', b'g/h/*', b''), (b'glob', b'g/h', b''),
1349 ... (b'glob', b'g*', b'')])
1369 ... (b'glob', b'g*', b'')])
1350 (['g/h', 'g/h', '.'], [], ['g', '.'])
1370 (['g/h', 'g/h', '.'], [], ['g', '.'])
1351 >>> _rootsdirsandparents(
1371 >>> _rootsdirsandparents(
1352 ... [(b'rootfilesin', b'g/h', b''), (b'rootfilesin', b'', b'')])
1372 ... [(b'rootfilesin', b'g/h', b''), (b'rootfilesin', b'', b'')])
1353 ([], ['g/h', '.'], ['g', '.'])
1373 ([], ['g/h', '.'], ['g', '.'])
1354 >>> _rootsdirsandparents(
1374 >>> _rootsdirsandparents(
1355 ... [(b'relpath', b'r', b''), (b'path', b'p/p', b''),
1375 ... [(b'relpath', b'r', b''), (b'path', b'p/p', b''),
1356 ... (b'path', b'', b'')])
1376 ... (b'path', b'', b'')])
1357 (['r', 'p/p', '.'], [], ['p', '.'])
1377 (['r', 'p/p', '.'], [], ['p', '.'])
1358 >>> _rootsdirsandparents(
1378 >>> _rootsdirsandparents(
1359 ... [(b'relglob', b'rg*', b''), (b're', b're/', b''),
1379 ... [(b'relglob', b'rg*', b''), (b're', b're/', b''),
1360 ... (b'relre', b'rr', b'')])
1380 ... (b'relre', b'rr', b'')])
1361 (['.', '.', '.'], [], ['.'])
1381 (['.', '.', '.'], [], ['.'])
1362 '''
1382 '''
1363 r, d = _patternrootsanddirs(kindpats)
1383 r, d = _patternrootsanddirs(kindpats)
1364
1384
1365 p = []
1385 p = []
1366 # Append the parents as non-recursive/exact directories, since they must be
1386 # Append the parents as non-recursive/exact directories, since they must be
1367 # scanned to get to either the roots or the other exact directories.
1387 # scanned to get to either the roots or the other exact directories.
1368 p.extend(util.dirs(d))
1388 p.extend(util.dirs(d))
1369 p.extend(util.dirs(r))
1389 p.extend(util.dirs(r))
1370 # util.dirs() does not include the root directory, so add it manually
1390 # util.dirs() does not include the root directory, so add it manually
1371 p.append('.')
1391 p.append('.')
1372
1392
1373 # FIXME: all uses of this function convert these to sets, do so before
1393 # FIXME: all uses of this function convert these to sets, do so before
1374 # returning.
1394 # returning.
1375 # FIXME: all uses of this function do not need anything in 'roots' and
1395 # FIXME: all uses of this function do not need anything in 'roots' and
1376 # 'dirs' to also be in 'parents', consider removing them before returning.
1396 # 'dirs' to also be in 'parents', consider removing them before returning.
1377 return r, d, p
1397 return r, d, p
1378
1398
1379 def _explicitfiles(kindpats):
1399 def _explicitfiles(kindpats):
1380 '''Returns the potential explicit filenames from the patterns.
1400 '''Returns the potential explicit filenames from the patterns.
1381
1401
1382 >>> _explicitfiles([(b'path', b'foo/bar', b'')])
1402 >>> _explicitfiles([(b'path', b'foo/bar', b'')])
1383 ['foo/bar']
1403 ['foo/bar']
1384 >>> _explicitfiles([(b'rootfilesin', b'foo/bar', b'')])
1404 >>> _explicitfiles([(b'rootfilesin', b'foo/bar', b'')])
1385 []
1405 []
1386 '''
1406 '''
1387 # Keep only the pattern kinds where one can specify filenames (vs only
1407 # Keep only the pattern kinds where one can specify filenames (vs only
1388 # directory names).
1408 # directory names).
1389 filable = [kp for kp in kindpats if kp[0] not in ('rootfilesin',)]
1409 filable = [kp for kp in kindpats if kp[0] not in ('rootfilesin',)]
1390 return _roots(filable)
1410 return _roots(filable)
1391
1411
1392 def _prefix(kindpats):
1412 def _prefix(kindpats):
1393 '''Whether all the patterns match a prefix (i.e. recursively)'''
1413 '''Whether all the patterns match a prefix (i.e. recursively)'''
1394 for kind, pat, source in kindpats:
1414 for kind, pat, source in kindpats:
1395 if kind not in ('path', 'relpath'):
1415 if kind not in ('path', 'relpath'):
1396 return False
1416 return False
1397 return True
1417 return True
1398
1418
1399 _commentre = None
1419 _commentre = None
1400
1420
1401 def readpatternfile(filepath, warn, sourceinfo=False):
1421 def readpatternfile(filepath, warn, sourceinfo=False):
1402 '''parse a pattern file, returning a list of
1422 '''parse a pattern file, returning a list of
1403 patterns. These patterns should be given to compile()
1423 patterns. These patterns should be given to compile()
1404 to be validated and converted into a match function.
1424 to be validated and converted into a match function.
1405
1425
1406 trailing white space is dropped.
1426 trailing white space is dropped.
1407 the escape character is backslash.
1427 the escape character is backslash.
1408 comments start with #.
1428 comments start with #.
1409 empty lines are skipped.
1429 empty lines are skipped.
1410
1430
1411 lines can be of the following formats:
1431 lines can be of the following formats:
1412
1432
1413 syntax: regexp # defaults following lines to non-rooted regexps
1433 syntax: regexp # defaults following lines to non-rooted regexps
1414 syntax: glob # defaults following lines to non-rooted globs
1434 syntax: glob # defaults following lines to non-rooted globs
1415 re:pattern # non-rooted regular expression
1435 re:pattern # non-rooted regular expression
1416 glob:pattern # non-rooted glob
1436 glob:pattern # non-rooted glob
1417 rootglob:pat # rooted glob (same root as ^ in regexps)
1437 rootglob:pat # rooted glob (same root as ^ in regexps)
1418 pattern # pattern of the current default type
1438 pattern # pattern of the current default type
1419
1439
1420 if sourceinfo is set, returns a list of tuples:
1440 if sourceinfo is set, returns a list of tuples:
1421 (pattern, lineno, originalline). This is useful to debug ignore patterns.
1441 (pattern, lineno, originalline).
1442 This is useful to debug ignore patterns.
1422 '''
1443 '''
1423
1444
1445 if rustext is not None:
1446 result, warnings = rustext.filepatterns.read_pattern_file(
1447 filepath,
1448 bool(warn),
1449 sourceinfo,
1450 )
1451
1452 for warning_params in warnings:
1453 # Can't be easily emitted from Rust, because it would require
1454 # a mechanism for both gettext and calling the `warn` function.
1455 warn(_("%s: ignoring invalid syntax '%s'\n") % warning_params)
1456
1457 return result
1458
1424 syntaxes = {
1459 syntaxes = {
1425 're': 'relre:',
1460 're': 'relre:',
1426 'regexp': 'relre:',
1461 'regexp': 'relre:',
1427 'glob': 'relglob:',
1462 'glob': 'relglob:',
1428 'rootglob': 'rootglob:',
1463 'rootglob': 'rootglob:',
1429 'include': 'include',
1464 'include': 'include',
1430 'subinclude': 'subinclude',
1465 'subinclude': 'subinclude',
1431 }
1466 }
1432 syntax = 'relre:'
1467 syntax = 'relre:'
1433 patterns = []
1468 patterns = []
1434
1469
1435 fp = open(filepath, 'rb')
1470 fp = open(filepath, 'rb')
1436 for lineno, line in enumerate(util.iterfile(fp), start=1):
1471 for lineno, line in enumerate(util.iterfile(fp), start=1):
1437 if "#" in line:
1472 if "#" in line:
1438 global _commentre
1473 global _commentre
1439 if not _commentre:
1474 if not _commentre:
1440 _commentre = util.re.compile(br'((?:^|[^\\])(?:\\\\)*)#.*')
1475 _commentre = util.re.compile(br'((?:^|[^\\])(?:\\\\)*)#.*')
1441 # remove comments prefixed by an even number of escapes
1476 # remove comments prefixed by an even number of escapes
1442 m = _commentre.search(line)
1477 m = _commentre.search(line)
1443 if m:
1478 if m:
1444 line = line[:m.end(1)]
1479 line = line[:m.end(1)]
1445 # fixup properly escaped comments that survived the above
1480 # fixup properly escaped comments that survived the above
1446 line = line.replace("\\#", "#")
1481 line = line.replace("\\#", "#")
1447 line = line.rstrip()
1482 line = line.rstrip()
1448 if not line:
1483 if not line:
1449 continue
1484 continue
1450
1485
1451 if line.startswith('syntax:'):
1486 if line.startswith('syntax:'):
1452 s = line[7:].strip()
1487 s = line[7:].strip()
1453 try:
1488 try:
1454 syntax = syntaxes[s]
1489 syntax = syntaxes[s]
1455 except KeyError:
1490 except KeyError:
1456 if warn:
1491 if warn:
1457 warn(_("%s: ignoring invalid syntax '%s'\n") %
1492 warn(_("%s: ignoring invalid syntax '%s'\n") %
1458 (filepath, s))
1493 (filepath, s))
1459 continue
1494 continue
1460
1495
1461 linesyntax = syntax
1496 linesyntax = syntax
1462 for s, rels in syntaxes.iteritems():
1497 for s, rels in syntaxes.iteritems():
1463 if line.startswith(rels):
1498 if line.startswith(rels):
1464 linesyntax = rels
1499 linesyntax = rels
1465 line = line[len(rels):]
1500 line = line[len(rels):]
1466 break
1501 break
1467 elif line.startswith(s+':'):
1502 elif line.startswith(s+':'):
1468 linesyntax = rels
1503 linesyntax = rels
1469 line = line[len(s) + 1:]
1504 line = line[len(s) + 1:]
1470 break
1505 break
1471 if sourceinfo:
1506 if sourceinfo:
1472 patterns.append((linesyntax + line, lineno, line))
1507 patterns.append((linesyntax + line, lineno, line))
1473 else:
1508 else:
1474 patterns.append(linesyntax + line)
1509 patterns.append(linesyntax + line)
1475 fp.close()
1510 fp.close()
1476 return patterns
1511 return patterns
@@ -1,200 +1,205 b''
1 # common patterns in test at can safely be replaced
1 # common patterns in test at can safely be replaced
2 from __future__ import absolute_import
2 from __future__ import absolute_import
3
3
4 import os
4 import os
5
5
6 substitutions = [
6 substitutions = [
7 # list of possible compressions
7 # list of possible compressions
8 (br'(zstd,)?zlib,none,bzip2',
8 (br'(zstd,)?zlib,none,bzip2',
9 br'$USUAL_COMPRESSIONS$'
9 br'$USUAL_COMPRESSIONS$'
10 ),
10 ),
11 (br'=(zstd,)?zlib',
11 (br'=(zstd,)?zlib',
12 br'=$BUNDLE2_COMPRESSIONS$'
12 br'=$BUNDLE2_COMPRESSIONS$'
13 ),
13 ),
14 # capabilities sent through http
14 # capabilities sent through http
15 (br'bundlecaps=HG20%2Cbundle2%3DHG20%250A'
15 (br'bundlecaps=HG20%2Cbundle2%3DHG20%250A'
16 br'bookmarks%250A'
16 br'bookmarks%250A'
17 br'changegroup%253D01%252C02%250A'
17 br'changegroup%253D01%252C02%250A'
18 br'digests%253Dmd5%252Csha1%252Csha512%250A'
18 br'digests%253Dmd5%252Csha1%252Csha512%250A'
19 br'error%253Dabort%252Cunsupportedcontent%252Cpushraced%252Cpushkey%250A'
19 br'error%253Dabort%252Cunsupportedcontent%252Cpushraced%252Cpushkey%250A'
20 br'hgtagsfnodes%250A'
20 br'hgtagsfnodes%250A'
21 br'listkeys%250A'
21 br'listkeys%250A'
22 br'phases%253Dheads%250A'
22 br'phases%253Dheads%250A'
23 br'pushkey%250A'
23 br'pushkey%250A'
24 br'remote-changegroup%253Dhttp%252Chttps%250A'
24 br'remote-changegroup%253Dhttp%252Chttps%250A'
25 br'rev-branch-cache%250A'
25 br'rev-branch-cache%250A'
26 br'stream%253Dv2',
26 br'stream%253Dv2',
27 # (the replacement patterns)
27 # (the replacement patterns)
28 br'$USUAL_BUNDLE_CAPS$'
28 br'$USUAL_BUNDLE_CAPS$'
29 ),
29 ),
30 (br'bundlecaps=HG20%2Cbundle2%3DHG20%250A'
30 (br'bundlecaps=HG20%2Cbundle2%3DHG20%250A'
31 br'bookmarks%250A'
31 br'bookmarks%250A'
32 br'changegroup%253D01%252C02%250A'
32 br'changegroup%253D01%252C02%250A'
33 br'digests%253Dmd5%252Csha1%252Csha512%250A'
33 br'digests%253Dmd5%252Csha1%252Csha512%250A'
34 br'error%253Dabort%252Cunsupportedcontent%252Cpushraced%252Cpushkey%250A'
34 br'error%253Dabort%252Cunsupportedcontent%252Cpushraced%252Cpushkey%250A'
35 br'hgtagsfnodes%250A'
35 br'hgtagsfnodes%250A'
36 br'listkeys%250A'
36 br'listkeys%250A'
37 br'phases%253Dheads%250A'
37 br'phases%253Dheads%250A'
38 br'pushkey%250A'
38 br'pushkey%250A'
39 br'remote-changegroup%253Dhttp%252Chttps',
39 br'remote-changegroup%253Dhttp%252Chttps',
40 # (the replacement patterns)
40 # (the replacement patterns)
41 br'$USUAL_BUNDLE_CAPS_SERVER$'
41 br'$USUAL_BUNDLE_CAPS_SERVER$'
42 ),
42 ),
43 # bundle2 capabilities sent through ssh
43 # bundle2 capabilities sent through ssh
44 (br'bundle2=HG20%0A'
44 (br'bundle2=HG20%0A'
45 br'bookmarks%0A'
45 br'bookmarks%0A'
46 br'changegroup%3D01%2C02%0A'
46 br'changegroup%3D01%2C02%0A'
47 br'digests%3Dmd5%2Csha1%2Csha512%0A'
47 br'digests%3Dmd5%2Csha1%2Csha512%0A'
48 br'error%3Dabort%2Cunsupportedcontent%2Cpushraced%2Cpushkey%0A'
48 br'error%3Dabort%2Cunsupportedcontent%2Cpushraced%2Cpushkey%0A'
49 br'hgtagsfnodes%0A'
49 br'hgtagsfnodes%0A'
50 br'listkeys%0A'
50 br'listkeys%0A'
51 br'phases%3Dheads%0A'
51 br'phases%3Dheads%0A'
52 br'pushkey%0A'
52 br'pushkey%0A'
53 br'remote-changegroup%3Dhttp%2Chttps%0A'
53 br'remote-changegroup%3Dhttp%2Chttps%0A'
54 br'rev-branch-cache%0A'
54 br'rev-branch-cache%0A'
55 br'stream%3Dv2',
55 br'stream%3Dv2',
56 # (replacement patterns)
56 # (replacement patterns)
57 br'$USUAL_BUNDLE2_CAPS$'
57 br'$USUAL_BUNDLE2_CAPS$'
58 ),
58 ),
59 # bundle2 capabilities advertised by the server
59 # bundle2 capabilities advertised by the server
60 (br'bundle2=HG20%0A'
60 (br'bundle2=HG20%0A'
61 br'bookmarks%0A'
61 br'bookmarks%0A'
62 br'changegroup%3D01%2C02%0A'
62 br'changegroup%3D01%2C02%0A'
63 br'digests%3Dmd5%2Csha1%2Csha512%0A'
63 br'digests%3Dmd5%2Csha1%2Csha512%0A'
64 br'error%3Dabort%2Cunsupportedcontent%2Cpushraced%2Cpushkey%0A'
64 br'error%3Dabort%2Cunsupportedcontent%2Cpushraced%2Cpushkey%0A'
65 br'hgtagsfnodes%0A'
65 br'hgtagsfnodes%0A'
66 br'listkeys%0A'
66 br'listkeys%0A'
67 br'phases%3Dheads%0A'
67 br'phases%3Dheads%0A'
68 br'pushkey%0A'
68 br'pushkey%0A'
69 br'remote-changegroup%3Dhttp%2Chttps%0A'
69 br'remote-changegroup%3Dhttp%2Chttps%0A'
70 br'rev-branch-cache',
70 br'rev-branch-cache',
71 # (replacement patterns)
71 # (replacement patterns)
72 br'$USUAL_BUNDLE2_CAPS_SERVER$'
72 br'$USUAL_BUNDLE2_CAPS_SERVER$'
73 ),
73 ),
74 (
74 (
75 br'bundle2=HG20%0A'
75 br'bundle2=HG20%0A'
76 br'bookmarks%0A'
76 br'bookmarks%0A'
77 br'changegroup%3D01%2C02%0A'
77 br'changegroup%3D01%2C02%0A'
78 br'digests%3Dmd5%2Csha1%2Csha512%0A'
78 br'digests%3Dmd5%2Csha1%2Csha512%0A'
79 br'error%3Dabort%2Cunsupportedcontent%2Cpushraced%2Cpushkey%0A'
79 br'error%3Dabort%2Cunsupportedcontent%2Cpushraced%2Cpushkey%0A'
80 br'hgtagsfnodes%0A'
80 br'hgtagsfnodes%0A'
81 br'listkeys%0A'
81 br'listkeys%0A'
82 br'pushkey%0A'
82 br'pushkey%0A'
83 br'remote-changegroup%3Dhttp%2Chttps%0A'
83 br'remote-changegroup%3Dhttp%2Chttps%0A'
84 br'rev-branch-cache%0A'
84 br'rev-branch-cache%0A'
85 br'stream%3Dv2',
85 br'stream%3Dv2',
86 # (replacement patterns)
86 # (replacement patterns)
87 br'$USUAL_BUNDLE2_CAPS_NO_PHASES$'
87 br'$USUAL_BUNDLE2_CAPS_NO_PHASES$'
88 ),
88 ),
89 # HTTP access log dates
89 # HTTP access log dates
90 (br' - - \[\d\d/.../2\d\d\d \d\d:\d\d:\d\d] "(GET|PUT|POST)',
90 (br' - - \[\d\d/.../2\d\d\d \d\d:\d\d:\d\d] "(GET|PUT|POST)',
91 lambda m: br' - - [$LOGDATE$] "' + m.group(1)
91 lambda m: br' - - [$LOGDATE$] "' + m.group(1)
92 ),
92 ),
93 # HTTP error log dates
93 # HTTP error log dates
94 (br' - - \[\d\d/.../2\d\d\d \d\d:\d\d:\d\d] (HG error:|Exception)',
94 (br' - - \[\d\d/.../2\d\d\d \d\d:\d\d:\d\d] (HG error:|Exception)',
95 lambda m: br' - - [$ERRDATE$] ' + m.group(1)
95 lambda m: br' - - [$ERRDATE$] ' + m.group(1)
96 ),
96 ),
97 # HTTP header dates- RFC 1123
97 # HTTP header dates- RFC 1123
98 (br'([Dd]ate): [A-Za-z]{3}, \d\d [A-Za-z]{3} \d{4} \d\d:\d\d:\d\d GMT',
98 (br'([Dd]ate): [A-Za-z]{3}, \d\d [A-Za-z]{3} \d{4} \d\d:\d\d:\d\d GMT',
99 lambda m: br'%s: $HTTP_DATE$' % m.group(1)
99 lambda m: br'%s: $HTTP_DATE$' % m.group(1)
100 ),
100 ),
101 # LFS expiration value
101 # LFS expiration value
102 (br'"expires_at": "\d{4}-\d\d-\d\dT\d\d:\d\d:\d\dZ"',
102 (br'"expires_at": "\d{4}-\d\d-\d\dT\d\d:\d\d:\d\dZ"',
103 br'"expires_at": "$ISO_8601_DATE_TIME$"'
103 br'"expires_at": "$ISO_8601_DATE_TIME$"'
104 ),
104 ),
105 # Windows has an extra '/' in the following lines that get globbed away:
105 # Windows has an extra '/' in the following lines that get globbed away:
106 # pushing to file:/*/$TESTTMP/r2 (glob)
106 # pushing to file:/*/$TESTTMP/r2 (glob)
107 # comparing with file:/*/$TESTTMP/r2 (glob)
107 # comparing with file:/*/$TESTTMP/r2 (glob)
108 # sub/maybelarge.dat: largefile 34..9c not available from
108 # sub/maybelarge.dat: largefile 34..9c not available from
109 # file:/*/$TESTTMP/largefiles-repo (glob)
109 # file:/*/$TESTTMP/largefiles-repo (glob)
110 (br'(.*file:/)/?(/\$TESTTMP.*)',
110 (br'(.*file:/)/?(/\$TESTTMP.*)',
111 lambda m: m.group(1) + b'*' + m.group(2) + b' (glob)'
111 lambda m: m.group(1) + b'*' + m.group(2) + b' (glob)'
112 ),
112 ),
113 ]
113 ]
114
114
115 # Various platform error strings, keyed on a common replacement string
115 # Various platform error strings, keyed on a common replacement string
116 _errors = {
116 _errors = {
117 br'$ENOENT$': (
117 br'$ENOENT$': (
118 # IOError in Python does not have the same error message
119 # than in Rust, and automatic conversion is not possible
120 # because of module member privacy.
121 br'No such file or directory \(os error 2\)',
122
118 # strerror()
123 # strerror()
119 br'No such file or directory',
124 br'No such file or directory',
120
125
121 # FormatMessage(ERROR_FILE_NOT_FOUND)
126 # FormatMessage(ERROR_FILE_NOT_FOUND)
122 br'The system cannot find the file specified',
127 br'The system cannot find the file specified',
123 ),
128 ),
124 br'$ENOTDIR$': (
129 br'$ENOTDIR$': (
125 # strerror()
130 # strerror()
126 br'Not a directory',
131 br'Not a directory',
127
132
128 # FormatMessage(ERROR_PATH_NOT_FOUND)
133 # FormatMessage(ERROR_PATH_NOT_FOUND)
129 br'The system cannot find the path specified',
134 br'The system cannot find the path specified',
130 ),
135 ),
131 br'$ECONNRESET$': (
136 br'$ECONNRESET$': (
132 # strerror()
137 # strerror()
133 br'Connection reset by peer',
138 br'Connection reset by peer',
134
139
135 # FormatMessage(WSAECONNRESET)
140 # FormatMessage(WSAECONNRESET)
136 br'An existing connection was forcibly closed by the remote host',
141 br'An existing connection was forcibly closed by the remote host',
137 ),
142 ),
138 br'$EADDRINUSE$': (
143 br'$EADDRINUSE$': (
139 # strerror()
144 # strerror()
140 br'Address already in use',
145 br'Address already in use',
141
146
142 # FormatMessage(WSAEADDRINUSE)
147 # FormatMessage(WSAEADDRINUSE)
143 br'Only one usage of each socket address'
148 br'Only one usage of each socket address'
144 br' \(protocol/network address/port\) is normally permitted',
149 br' \(protocol/network address/port\) is normally permitted',
145 ),
150 ),
146 br'$EADDRNOTAVAIL$': (
151 br'$EADDRNOTAVAIL$': (
147 # strerror()
152 # strerror()
148 br'Cannot assign requested address',
153 br'Cannot assign requested address',
149
154
150 # FormatMessage(WSAEADDRNOTAVAIL)
155 # FormatMessage(WSAEADDRNOTAVAIL)
151 )
156 )
152 }
157 }
153
158
154 for replace, msgs in _errors.items():
159 for replace, msgs in _errors.items():
155 substitutions.extend((m, replace) for m in msgs)
160 substitutions.extend((m, replace) for m in msgs)
156
161
157 # Output lines on Windows that can be autocorrected for '\' vs '/' path
162 # Output lines on Windows that can be autocorrected for '\' vs '/' path
158 # differences.
163 # differences.
159 _winpathfixes = [
164 _winpathfixes = [
160 # cloning subrepo s\ss from $TESTTMP/t/s/ss
165 # cloning subrepo s\ss from $TESTTMP/t/s/ss
161 # cloning subrepo foo\bar from http://localhost:$HGPORT/foo/bar
166 # cloning subrepo foo\bar from http://localhost:$HGPORT/foo/bar
162 br'(?m)^cloning subrepo \S+\\.*',
167 br'(?m)^cloning subrepo \S+\\.*',
163
168
164 # pulling from $TESTTMP\issue1852a
169 # pulling from $TESTTMP\issue1852a
165 br'(?m)^pulling from \$TESTTMP\\.*',
170 br'(?m)^pulling from \$TESTTMP\\.*',
166
171
167 # pushing to $TESTTMP\a
172 # pushing to $TESTTMP\a
168 br'(?m)^pushing to \$TESTTMP\\.*',
173 br'(?m)^pushing to \$TESTTMP\\.*',
169
174
170 # pushing subrepo s\ss to $TESTTMP/t/s/ss
175 # pushing subrepo s\ss to $TESTTMP/t/s/ss
171 br'(?m)^pushing subrepo \S+\\\S+ to.*',
176 br'(?m)^pushing subrepo \S+\\\S+ to.*',
172
177
173 # moving d1\d11\a1 to d3/d11/a1
178 # moving d1\d11\a1 to d3/d11/a1
174 br'(?m)^moving \S+\\.*',
179 br'(?m)^moving \S+\\.*',
175
180
176 # d1\a: not recording move - dummy does not exist
181 # d1\a: not recording move - dummy does not exist
177 br'\S+\\\S+: not recording move .+',
182 br'\S+\\\S+: not recording move .+',
178
183
179 # reverting s\a
184 # reverting s\a
180 br'(?m)^reverting (?!subrepo ).*\\.*',
185 br'(?m)^reverting (?!subrepo ).*\\.*',
181
186
182 # saved backup bundle to
187 # saved backup bundle to
183 # $TESTTMP\test\.hg\strip-backup/443431ffac4f-2fc5398a-backup.hg
188 # $TESTTMP\test\.hg\strip-backup/443431ffac4f-2fc5398a-backup.hg
184 br'(?m)^saved backup bundle to \$TESTTMP.*\.hg',
189 br'(?m)^saved backup bundle to \$TESTTMP.*\.hg',
185
190
186 # no changes made to subrepo s\ss since last push to ../tcc/s/ss
191 # no changes made to subrepo s\ss since last push to ../tcc/s/ss
187 br'(?m)^no changes made to subrepo \S+\\\S+ since.*',
192 br'(?m)^no changes made to subrepo \S+\\\S+ since.*',
188
193
189 # changeset 5:9cc5aa7204f0: stuff/maybelarge.dat references missing
194 # changeset 5:9cc5aa7204f0: stuff/maybelarge.dat references missing
190 # $TESTTMP\largefiles-repo-hg\.hg\largefiles\76..38
195 # $TESTTMP\largefiles-repo-hg\.hg\largefiles\76..38
191 br'(?m)^changeset .* references (corrupted|missing) \$TESTTMP\\.*',
196 br'(?m)^changeset .* references (corrupted|missing) \$TESTTMP\\.*',
192
197
193 # stuff/maybelarge.dat: largefile 76..38 not available from
198 # stuff/maybelarge.dat: largefile 76..38 not available from
194 # file:/*/$TESTTMP\largefiles-repo (glob)
199 # file:/*/$TESTTMP\largefiles-repo (glob)
195 br'.*: largefile \S+ not available from file:/\*/.+',
200 br'.*: largefile \S+ not available from file:/\*/.+',
196 ]
201 ]
197
202
198 if os.name == 'nt':
203 if os.name == 'nt':
199 substitutions.extend([(s, lambda match: match.group().replace(b'\\', b'/'))
204 substitutions.extend([(s, lambda match: match.group().replace(b'\\', b'/'))
200 for s in _winpathfixes])
205 for s in _winpathfixes])
General Comments 0
You need to be logged in to leave comments. Login now