##// END OF EJS Templates
match: de-flake test-doctest.py by not depending on util.dirs() order...
Martin von Zweigbergk -
r42938:c4b8f863 default
parent child Browse files
Show More
@@ -1,1526 +1,1526
1 # match.py - filename matching
1 # match.py - filename matching
2 #
2 #
3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import, print_function
8 from __future__ import absolute_import, print_function
9
9
10 import copy
10 import copy
11 import itertools
11 import itertools
12 import os
12 import os
13 import re
13 import re
14
14
15 from .i18n import _
15 from .i18n import _
16 from . import (
16 from . import (
17 encoding,
17 encoding,
18 error,
18 error,
19 pathutil,
19 pathutil,
20 pycompat,
20 pycompat,
21 util,
21 util,
22 )
22 )
23 from .utils import (
23 from .utils import (
24 stringutil,
24 stringutil,
25 )
25 )
26
26
27 try:
27 try:
28 from . import rustext
28 from . import rustext
29 rustext.__name__ # force actual import (see hgdemandimport)
29 rustext.__name__ # force actual import (see hgdemandimport)
30 except ImportError:
30 except ImportError:
31 rustext = None
31 rustext = None
32
32
33 allpatternkinds = ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',
33 allpatternkinds = ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',
34 'rootglob',
34 'rootglob',
35 'listfile', 'listfile0', 'set', 'include', 'subinclude',
35 'listfile', 'listfile0', 'set', 'include', 'subinclude',
36 'rootfilesin')
36 'rootfilesin')
37 cwdrelativepatternkinds = ('relpath', 'glob')
37 cwdrelativepatternkinds = ('relpath', 'glob')
38
38
39 propertycache = util.propertycache
39 propertycache = util.propertycache
40
40
41 def _rematcher(regex):
41 def _rematcher(regex):
42 '''compile the regexp with the best available regexp engine and return a
42 '''compile the regexp with the best available regexp engine and return a
43 matcher function'''
43 matcher function'''
44 m = util.re.compile(regex)
44 m = util.re.compile(regex)
45 try:
45 try:
46 # slightly faster, provided by facebook's re2 bindings
46 # slightly faster, provided by facebook's re2 bindings
47 return m.test_match
47 return m.test_match
48 except AttributeError:
48 except AttributeError:
49 return m.match
49 return m.match
50
50
51 def _expandsets(kindpats, ctx=None, listsubrepos=False, badfn=None):
51 def _expandsets(kindpats, ctx=None, listsubrepos=False, badfn=None):
52 '''Returns the kindpats list with the 'set' patterns expanded to matchers'''
52 '''Returns the kindpats list with the 'set' patterns expanded to matchers'''
53 matchers = []
53 matchers = []
54 other = []
54 other = []
55
55
56 for kind, pat, source in kindpats:
56 for kind, pat, source in kindpats:
57 if kind == 'set':
57 if kind == 'set':
58 if ctx is None:
58 if ctx is None:
59 raise error.ProgrammingError("fileset expression with no "
59 raise error.ProgrammingError("fileset expression with no "
60 "context")
60 "context")
61 matchers.append(ctx.matchfileset(pat, badfn=badfn))
61 matchers.append(ctx.matchfileset(pat, badfn=badfn))
62
62
63 if listsubrepos:
63 if listsubrepos:
64 for subpath in ctx.substate:
64 for subpath in ctx.substate:
65 sm = ctx.sub(subpath).matchfileset(pat, badfn=badfn)
65 sm = ctx.sub(subpath).matchfileset(pat, badfn=badfn)
66 pm = prefixdirmatcher(subpath, sm, badfn=badfn)
66 pm = prefixdirmatcher(subpath, sm, badfn=badfn)
67 matchers.append(pm)
67 matchers.append(pm)
68
68
69 continue
69 continue
70 other.append((kind, pat, source))
70 other.append((kind, pat, source))
71 return matchers, other
71 return matchers, other
72
72
73 def _expandsubinclude(kindpats, root):
73 def _expandsubinclude(kindpats, root):
74 '''Returns the list of subinclude matcher args and the kindpats without the
74 '''Returns the list of subinclude matcher args and the kindpats without the
75 subincludes in it.'''
75 subincludes in it.'''
76 relmatchers = []
76 relmatchers = []
77 other = []
77 other = []
78
78
79 for kind, pat, source in kindpats:
79 for kind, pat, source in kindpats:
80 if kind == 'subinclude':
80 if kind == 'subinclude':
81 sourceroot = pathutil.dirname(util.normpath(source))
81 sourceroot = pathutil.dirname(util.normpath(source))
82 pat = util.pconvert(pat)
82 pat = util.pconvert(pat)
83 path = pathutil.join(sourceroot, pat)
83 path = pathutil.join(sourceroot, pat)
84
84
85 newroot = pathutil.dirname(path)
85 newroot = pathutil.dirname(path)
86 matcherargs = (newroot, '', [], ['include:%s' % path])
86 matcherargs = (newroot, '', [], ['include:%s' % path])
87
87
88 prefix = pathutil.canonpath(root, root, newroot)
88 prefix = pathutil.canonpath(root, root, newroot)
89 if prefix:
89 if prefix:
90 prefix += '/'
90 prefix += '/'
91 relmatchers.append((prefix, matcherargs))
91 relmatchers.append((prefix, matcherargs))
92 else:
92 else:
93 other.append((kind, pat, source))
93 other.append((kind, pat, source))
94
94
95 return relmatchers, other
95 return relmatchers, other
96
96
97 def _kindpatsalwaysmatch(kindpats):
97 def _kindpatsalwaysmatch(kindpats):
98 """"Checks whether the kindspats match everything, as e.g.
98 """"Checks whether the kindspats match everything, as e.g.
99 'relpath:.' does.
99 'relpath:.' does.
100 """
100 """
101 for kind, pat, source in kindpats:
101 for kind, pat, source in kindpats:
102 if pat != '' or kind not in ['relpath', 'glob']:
102 if pat != '' or kind not in ['relpath', 'glob']:
103 return False
103 return False
104 return True
104 return True
105
105
106 def _buildkindpatsmatcher(matchercls, root, kindpats, ctx=None,
106 def _buildkindpatsmatcher(matchercls, root, kindpats, ctx=None,
107 listsubrepos=False, badfn=None):
107 listsubrepos=False, badfn=None):
108 matchers = []
108 matchers = []
109 fms, kindpats = _expandsets(kindpats, ctx=ctx,
109 fms, kindpats = _expandsets(kindpats, ctx=ctx,
110 listsubrepos=listsubrepos, badfn=badfn)
110 listsubrepos=listsubrepos, badfn=badfn)
111 if kindpats:
111 if kindpats:
112 m = matchercls(root, kindpats, badfn=badfn)
112 m = matchercls(root, kindpats, badfn=badfn)
113 matchers.append(m)
113 matchers.append(m)
114 if fms:
114 if fms:
115 matchers.extend(fms)
115 matchers.extend(fms)
116 if not matchers:
116 if not matchers:
117 return nevermatcher(badfn=badfn)
117 return nevermatcher(badfn=badfn)
118 if len(matchers) == 1:
118 if len(matchers) == 1:
119 return matchers[0]
119 return matchers[0]
120 return unionmatcher(matchers)
120 return unionmatcher(matchers)
121
121
122 def match(root, cwd, patterns=None, include=None, exclude=None, default='glob',
122 def match(root, cwd, patterns=None, include=None, exclude=None, default='glob',
123 auditor=None, ctx=None, listsubrepos=False, warn=None,
123 auditor=None, ctx=None, listsubrepos=False, warn=None,
124 badfn=None, icasefs=False):
124 badfn=None, icasefs=False):
125 r"""build an object to match a set of file patterns
125 r"""build an object to match a set of file patterns
126
126
127 arguments:
127 arguments:
128 root - the canonical root of the tree you're matching against
128 root - the canonical root of the tree you're matching against
129 cwd - the current working directory, if relevant
129 cwd - the current working directory, if relevant
130 patterns - patterns to find
130 patterns - patterns to find
131 include - patterns to include (unless they are excluded)
131 include - patterns to include (unless they are excluded)
132 exclude - patterns to exclude (even if they are included)
132 exclude - patterns to exclude (even if they are included)
133 default - if a pattern in patterns has no explicit type, assume this one
133 default - if a pattern in patterns has no explicit type, assume this one
134 auditor - optional path auditor
134 auditor - optional path auditor
135 ctx - optional changecontext
135 ctx - optional changecontext
136 listsubrepos - if True, recurse into subrepositories
136 listsubrepos - if True, recurse into subrepositories
137 warn - optional function used for printing warnings
137 warn - optional function used for printing warnings
138 badfn - optional bad() callback for this matcher instead of the default
138 badfn - optional bad() callback for this matcher instead of the default
139 icasefs - make a matcher for wdir on case insensitive filesystems, which
139 icasefs - make a matcher for wdir on case insensitive filesystems, which
140 normalizes the given patterns to the case in the filesystem
140 normalizes the given patterns to the case in the filesystem
141
141
142 a pattern is one of:
142 a pattern is one of:
143 'glob:<glob>' - a glob relative to cwd
143 'glob:<glob>' - a glob relative to cwd
144 're:<regexp>' - a regular expression
144 're:<regexp>' - a regular expression
145 'path:<path>' - a path relative to repository root, which is matched
145 'path:<path>' - a path relative to repository root, which is matched
146 recursively
146 recursively
147 'rootfilesin:<path>' - a path relative to repository root, which is
147 'rootfilesin:<path>' - a path relative to repository root, which is
148 matched non-recursively (will not match subdirectories)
148 matched non-recursively (will not match subdirectories)
149 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
149 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
150 'relpath:<path>' - a path relative to cwd
150 'relpath:<path>' - a path relative to cwd
151 'relre:<regexp>' - a regexp that needn't match the start of a name
151 'relre:<regexp>' - a regexp that needn't match the start of a name
152 'set:<fileset>' - a fileset expression
152 'set:<fileset>' - a fileset expression
153 'include:<path>' - a file of patterns to read and include
153 'include:<path>' - a file of patterns to read and include
154 'subinclude:<path>' - a file of patterns to match against files under
154 'subinclude:<path>' - a file of patterns to match against files under
155 the same directory
155 the same directory
156 '<something>' - a pattern of the specified default type
156 '<something>' - a pattern of the specified default type
157
157
158 Usually a patternmatcher is returned:
158 Usually a patternmatcher is returned:
159 >>> match(b'foo', b'.', [b're:.*\.c$', b'path:foo/a', b'*.py'])
159 >>> match(b'foo', b'.', [b're:.*\.c$', b'path:foo/a', b'*.py'])
160 <patternmatcher patterns='.*\\.c$|foo/a(?:/|$)|[^/]*\\.py$'>
160 <patternmatcher patterns='.*\\.c$|foo/a(?:/|$)|[^/]*\\.py$'>
161
161
162 Combining 'patterns' with 'include' (resp. 'exclude') gives an
162 Combining 'patterns' with 'include' (resp. 'exclude') gives an
163 intersectionmatcher (resp. a differencematcher):
163 intersectionmatcher (resp. a differencematcher):
164 >>> type(match(b'foo', b'.', [b're:.*\.c$'], include=[b'path:lib']))
164 >>> type(match(b'foo', b'.', [b're:.*\.c$'], include=[b'path:lib']))
165 <class 'mercurial.match.intersectionmatcher'>
165 <class 'mercurial.match.intersectionmatcher'>
166 >>> type(match(b'foo', b'.', [b're:.*\.c$'], exclude=[b'path:build']))
166 >>> type(match(b'foo', b'.', [b're:.*\.c$'], exclude=[b'path:build']))
167 <class 'mercurial.match.differencematcher'>
167 <class 'mercurial.match.differencematcher'>
168
168
169 Notice that, if 'patterns' is empty, an alwaysmatcher is returned:
169 Notice that, if 'patterns' is empty, an alwaysmatcher is returned:
170 >>> match(b'foo', b'.', [])
170 >>> match(b'foo', b'.', [])
171 <alwaysmatcher>
171 <alwaysmatcher>
172
172
173 The 'default' argument determines which kind of pattern is assumed if a
173 The 'default' argument determines which kind of pattern is assumed if a
174 pattern has no prefix:
174 pattern has no prefix:
175 >>> match(b'foo', b'.', [b'.*\.c$'], default=b're')
175 >>> match(b'foo', b'.', [b'.*\.c$'], default=b're')
176 <patternmatcher patterns='.*\\.c$'>
176 <patternmatcher patterns='.*\\.c$'>
177 >>> match(b'foo', b'.', [b'main.py'], default=b'relpath')
177 >>> match(b'foo', b'.', [b'main.py'], default=b'relpath')
178 <patternmatcher patterns='main\\.py(?:/|$)'>
178 <patternmatcher patterns='main\\.py(?:/|$)'>
179 >>> match(b'foo', b'.', [b'main.py'], default=b're')
179 >>> match(b'foo', b'.', [b'main.py'], default=b're')
180 <patternmatcher patterns='main.py'>
180 <patternmatcher patterns='main.py'>
181
181
182 The primary use of matchers is to check whether a value (usually a file
182 The primary use of matchers is to check whether a value (usually a file
183 name) matches againset one of the patterns given at initialization. There
183 name) matches againset one of the patterns given at initialization. There
184 are two ways of doing this check.
184 are two ways of doing this check.
185
185
186 >>> m = match(b'foo', b'', [b're:.*\.c$', b'relpath:a'])
186 >>> m = match(b'foo', b'', [b're:.*\.c$', b'relpath:a'])
187
187
188 1. Calling the matcher with a file name returns True if any pattern
188 1. Calling the matcher with a file name returns True if any pattern
189 matches that file name:
189 matches that file name:
190 >>> m(b'a')
190 >>> m(b'a')
191 True
191 True
192 >>> m(b'main.c')
192 >>> m(b'main.c')
193 True
193 True
194 >>> m(b'test.py')
194 >>> m(b'test.py')
195 False
195 False
196
196
197 2. Using the exact() method only returns True if the file name matches one
197 2. Using the exact() method only returns True if the file name matches one
198 of the exact patterns (i.e. not re: or glob: patterns):
198 of the exact patterns (i.e. not re: or glob: patterns):
199 >>> m.exact(b'a')
199 >>> m.exact(b'a')
200 True
200 True
201 >>> m.exact(b'main.c')
201 >>> m.exact(b'main.c')
202 False
202 False
203 """
203 """
204 normalize = _donormalize
204 normalize = _donormalize
205 if icasefs:
205 if icasefs:
206 dirstate = ctx.repo().dirstate
206 dirstate = ctx.repo().dirstate
207 dsnormalize = dirstate.normalize
207 dsnormalize = dirstate.normalize
208
208
209 def normalize(patterns, default, root, cwd, auditor, warn):
209 def normalize(patterns, default, root, cwd, auditor, warn):
210 kp = _donormalize(patterns, default, root, cwd, auditor, warn)
210 kp = _donormalize(patterns, default, root, cwd, auditor, warn)
211 kindpats = []
211 kindpats = []
212 for kind, pats, source in kp:
212 for kind, pats, source in kp:
213 if kind not in ('re', 'relre'): # regex can't be normalized
213 if kind not in ('re', 'relre'): # regex can't be normalized
214 p = pats
214 p = pats
215 pats = dsnormalize(pats)
215 pats = dsnormalize(pats)
216
216
217 # Preserve the original to handle a case only rename.
217 # Preserve the original to handle a case only rename.
218 if p != pats and p in dirstate:
218 if p != pats and p in dirstate:
219 kindpats.append((kind, p, source))
219 kindpats.append((kind, p, source))
220
220
221 kindpats.append((kind, pats, source))
221 kindpats.append((kind, pats, source))
222 return kindpats
222 return kindpats
223
223
224 if patterns:
224 if patterns:
225 kindpats = normalize(patterns, default, root, cwd, auditor, warn)
225 kindpats = normalize(patterns, default, root, cwd, auditor, warn)
226 if _kindpatsalwaysmatch(kindpats):
226 if _kindpatsalwaysmatch(kindpats):
227 m = alwaysmatcher(badfn)
227 m = alwaysmatcher(badfn)
228 else:
228 else:
229 m = _buildkindpatsmatcher(patternmatcher, root, kindpats, ctx=ctx,
229 m = _buildkindpatsmatcher(patternmatcher, root, kindpats, ctx=ctx,
230 listsubrepos=listsubrepos, badfn=badfn)
230 listsubrepos=listsubrepos, badfn=badfn)
231 else:
231 else:
232 # It's a little strange that no patterns means to match everything.
232 # It's a little strange that no patterns means to match everything.
233 # Consider changing this to match nothing (probably using nevermatcher).
233 # Consider changing this to match nothing (probably using nevermatcher).
234 m = alwaysmatcher(badfn)
234 m = alwaysmatcher(badfn)
235
235
236 if include:
236 if include:
237 kindpats = normalize(include, 'glob', root, cwd, auditor, warn)
237 kindpats = normalize(include, 'glob', root, cwd, auditor, warn)
238 im = _buildkindpatsmatcher(includematcher, root, kindpats, ctx=ctx,
238 im = _buildkindpatsmatcher(includematcher, root, kindpats, ctx=ctx,
239 listsubrepos=listsubrepos, badfn=None)
239 listsubrepos=listsubrepos, badfn=None)
240 m = intersectmatchers(m, im)
240 m = intersectmatchers(m, im)
241 if exclude:
241 if exclude:
242 kindpats = normalize(exclude, 'glob', root, cwd, auditor, warn)
242 kindpats = normalize(exclude, 'glob', root, cwd, auditor, warn)
243 em = _buildkindpatsmatcher(includematcher, root, kindpats, ctx=ctx,
243 em = _buildkindpatsmatcher(includematcher, root, kindpats, ctx=ctx,
244 listsubrepos=listsubrepos, badfn=None)
244 listsubrepos=listsubrepos, badfn=None)
245 m = differencematcher(m, em)
245 m = differencematcher(m, em)
246 return m
246 return m
247
247
248 def exact(files, badfn=None):
248 def exact(files, badfn=None):
249 return exactmatcher(files, badfn=badfn)
249 return exactmatcher(files, badfn=badfn)
250
250
251 def always(badfn=None):
251 def always(badfn=None):
252 return alwaysmatcher(badfn)
252 return alwaysmatcher(badfn)
253
253
254 def never(badfn=None):
254 def never(badfn=None):
255 return nevermatcher(badfn)
255 return nevermatcher(badfn)
256
256
257 def badmatch(match, badfn):
257 def badmatch(match, badfn):
258 """Make a copy of the given matcher, replacing its bad method with the given
258 """Make a copy of the given matcher, replacing its bad method with the given
259 one.
259 one.
260 """
260 """
261 m = copy.copy(match)
261 m = copy.copy(match)
262 m.bad = badfn
262 m.bad = badfn
263 return m
263 return m
264
264
265 def _donormalize(patterns, default, root, cwd, auditor=None, warn=None):
265 def _donormalize(patterns, default, root, cwd, auditor=None, warn=None):
266 '''Convert 'kind:pat' from the patterns list to tuples with kind and
266 '''Convert 'kind:pat' from the patterns list to tuples with kind and
267 normalized and rooted patterns and with listfiles expanded.'''
267 normalized and rooted patterns and with listfiles expanded.'''
268 kindpats = []
268 kindpats = []
269 for kind, pat in [_patsplit(p, default) for p in patterns]:
269 for kind, pat in [_patsplit(p, default) for p in patterns]:
270 if kind in cwdrelativepatternkinds:
270 if kind in cwdrelativepatternkinds:
271 pat = pathutil.canonpath(root, cwd, pat, auditor=auditor)
271 pat = pathutil.canonpath(root, cwd, pat, auditor=auditor)
272 elif kind in ('relglob', 'path', 'rootfilesin', 'rootglob'):
272 elif kind in ('relglob', 'path', 'rootfilesin', 'rootglob'):
273 pat = util.normpath(pat)
273 pat = util.normpath(pat)
274 elif kind in ('listfile', 'listfile0'):
274 elif kind in ('listfile', 'listfile0'):
275 try:
275 try:
276 files = util.readfile(pat)
276 files = util.readfile(pat)
277 if kind == 'listfile0':
277 if kind == 'listfile0':
278 files = files.split('\0')
278 files = files.split('\0')
279 else:
279 else:
280 files = files.splitlines()
280 files = files.splitlines()
281 files = [f for f in files if f]
281 files = [f for f in files if f]
282 except EnvironmentError:
282 except EnvironmentError:
283 raise error.Abort(_("unable to read file list (%s)") % pat)
283 raise error.Abort(_("unable to read file list (%s)") % pat)
284 for k, p, source in _donormalize(files, default, root, cwd,
284 for k, p, source in _donormalize(files, default, root, cwd,
285 auditor, warn):
285 auditor, warn):
286 kindpats.append((k, p, pat))
286 kindpats.append((k, p, pat))
287 continue
287 continue
288 elif kind == 'include':
288 elif kind == 'include':
289 try:
289 try:
290 fullpath = os.path.join(root, util.localpath(pat))
290 fullpath = os.path.join(root, util.localpath(pat))
291 includepats = readpatternfile(fullpath, warn)
291 includepats = readpatternfile(fullpath, warn)
292 for k, p, source in _donormalize(includepats, default,
292 for k, p, source in _donormalize(includepats, default,
293 root, cwd, auditor, warn):
293 root, cwd, auditor, warn):
294 kindpats.append((k, p, source or pat))
294 kindpats.append((k, p, source or pat))
295 except error.Abort as inst:
295 except error.Abort as inst:
296 raise error.Abort('%s: %s' % (pat, inst[0]))
296 raise error.Abort('%s: %s' % (pat, inst[0]))
297 except IOError as inst:
297 except IOError as inst:
298 if warn:
298 if warn:
299 warn(_("skipping unreadable pattern file '%s': %s\n") %
299 warn(_("skipping unreadable pattern file '%s': %s\n") %
300 (pat, stringutil.forcebytestr(inst.strerror)))
300 (pat, stringutil.forcebytestr(inst.strerror)))
301 continue
301 continue
302 # else: re or relre - which cannot be normalized
302 # else: re or relre - which cannot be normalized
303 kindpats.append((kind, pat, ''))
303 kindpats.append((kind, pat, ''))
304 return kindpats
304 return kindpats
305
305
306 class basematcher(object):
306 class basematcher(object):
307
307
308 def __init__(self, badfn=None):
308 def __init__(self, badfn=None):
309 if badfn is not None:
309 if badfn is not None:
310 self.bad = badfn
310 self.bad = badfn
311
311
312 def __call__(self, fn):
312 def __call__(self, fn):
313 return self.matchfn(fn)
313 return self.matchfn(fn)
314 # Callbacks related to how the matcher is used by dirstate.walk.
314 # Callbacks related to how the matcher is used by dirstate.walk.
315 # Subscribers to these events must monkeypatch the matcher object.
315 # Subscribers to these events must monkeypatch the matcher object.
316 def bad(self, f, msg):
316 def bad(self, f, msg):
317 '''Callback from dirstate.walk for each explicit file that can't be
317 '''Callback from dirstate.walk for each explicit file that can't be
318 found/accessed, with an error message.'''
318 found/accessed, with an error message.'''
319
319
320 # If an explicitdir is set, it will be called when an explicitly listed
320 # If an explicitdir is set, it will be called when an explicitly listed
321 # directory is visited.
321 # directory is visited.
322 explicitdir = None
322 explicitdir = None
323
323
324 # If an traversedir is set, it will be called when a directory discovered
324 # If an traversedir is set, it will be called when a directory discovered
325 # by recursive traversal is visited.
325 # by recursive traversal is visited.
326 traversedir = None
326 traversedir = None
327
327
328 @propertycache
328 @propertycache
329 def _files(self):
329 def _files(self):
330 return []
330 return []
331
331
332 def files(self):
332 def files(self):
333 '''Explicitly listed files or patterns or roots:
333 '''Explicitly listed files or patterns or roots:
334 if no patterns or .always(): empty list,
334 if no patterns or .always(): empty list,
335 if exact: list exact files,
335 if exact: list exact files,
336 if not .anypats(): list all files and dirs,
336 if not .anypats(): list all files and dirs,
337 else: optimal roots'''
337 else: optimal roots'''
338 return self._files
338 return self._files
339
339
340 @propertycache
340 @propertycache
341 def _fileset(self):
341 def _fileset(self):
342 return set(self._files)
342 return set(self._files)
343
343
344 def exact(self, f):
344 def exact(self, f):
345 '''Returns True if f is in .files().'''
345 '''Returns True if f is in .files().'''
346 return f in self._fileset
346 return f in self._fileset
347
347
348 def matchfn(self, f):
348 def matchfn(self, f):
349 return False
349 return False
350
350
351 def visitdir(self, dir):
351 def visitdir(self, dir):
352 '''Decides whether a directory should be visited based on whether it
352 '''Decides whether a directory should be visited based on whether it
353 has potential matches in it or one of its subdirectories. This is
353 has potential matches in it or one of its subdirectories. This is
354 based on the match's primary, included, and excluded patterns.
354 based on the match's primary, included, and excluded patterns.
355
355
356 Returns the string 'all' if the given directory and all subdirectories
356 Returns the string 'all' if the given directory and all subdirectories
357 should be visited. Otherwise returns True or False indicating whether
357 should be visited. Otherwise returns True or False indicating whether
358 the given directory should be visited.
358 the given directory should be visited.
359 '''
359 '''
360 return True
360 return True
361
361
362 def visitchildrenset(self, dir):
362 def visitchildrenset(self, dir):
363 '''Decides whether a directory should be visited based on whether it
363 '''Decides whether a directory should be visited based on whether it
364 has potential matches in it or one of its subdirectories, and
364 has potential matches in it or one of its subdirectories, and
365 potentially lists which subdirectories of that directory should be
365 potentially lists which subdirectories of that directory should be
366 visited. This is based on the match's primary, included, and excluded
366 visited. This is based on the match's primary, included, and excluded
367 patterns.
367 patterns.
368
368
369 This function is very similar to 'visitdir', and the following mapping
369 This function is very similar to 'visitdir', and the following mapping
370 can be applied:
370 can be applied:
371
371
372 visitdir | visitchildrenlist
372 visitdir | visitchildrenlist
373 ----------+-------------------
373 ----------+-------------------
374 False | set()
374 False | set()
375 'all' | 'all'
375 'all' | 'all'
376 True | 'this' OR non-empty set of subdirs -or files- to visit
376 True | 'this' OR non-empty set of subdirs -or files- to visit
377
377
378 Example:
378 Example:
379 Assume matchers ['path:foo/bar', 'rootfilesin:qux'], we would return
379 Assume matchers ['path:foo/bar', 'rootfilesin:qux'], we would return
380 the following values (assuming the implementation of visitchildrenset
380 the following values (assuming the implementation of visitchildrenset
381 is capable of recognizing this; some implementations are not).
381 is capable of recognizing this; some implementations are not).
382
382
383 '' -> {'foo', 'qux'}
383 '' -> {'foo', 'qux'}
384 'baz' -> set()
384 'baz' -> set()
385 'foo' -> {'bar'}
385 'foo' -> {'bar'}
386 # Ideally this would be 'all', but since the prefix nature of matchers
386 # Ideally this would be 'all', but since the prefix nature of matchers
387 # is applied to the entire matcher, we have to downgrade this to
387 # is applied to the entire matcher, we have to downgrade this to
388 # 'this' due to the non-prefix 'rootfilesin'-kind matcher being mixed
388 # 'this' due to the non-prefix 'rootfilesin'-kind matcher being mixed
389 # in.
389 # in.
390 'foo/bar' -> 'this'
390 'foo/bar' -> 'this'
391 'qux' -> 'this'
391 'qux' -> 'this'
392
392
393 Important:
393 Important:
394 Most matchers do not know if they're representing files or
394 Most matchers do not know if they're representing files or
395 directories. They see ['path:dir/f'] and don't know whether 'f' is a
395 directories. They see ['path:dir/f'] and don't know whether 'f' is a
396 file or a directory, so visitchildrenset('dir') for most matchers will
396 file or a directory, so visitchildrenset('dir') for most matchers will
397 return {'f'}, but if the matcher knows it's a file (like exactmatcher
397 return {'f'}, but if the matcher knows it's a file (like exactmatcher
398 does), it may return 'this'. Do not rely on the return being a set
398 does), it may return 'this'. Do not rely on the return being a set
399 indicating that there are no files in this dir to investigate (or
399 indicating that there are no files in this dir to investigate (or
400 equivalently that if there are files to investigate in 'dir' that it
400 equivalently that if there are files to investigate in 'dir' that it
401 will always return 'this').
401 will always return 'this').
402 '''
402 '''
403 return 'this'
403 return 'this'
404
404
405 def always(self):
405 def always(self):
406 '''Matcher will match everything and .files() will be empty --
406 '''Matcher will match everything and .files() will be empty --
407 optimization might be possible.'''
407 optimization might be possible.'''
408 return False
408 return False
409
409
410 def isexact(self):
410 def isexact(self):
411 '''Matcher will match exactly the list of files in .files() --
411 '''Matcher will match exactly the list of files in .files() --
412 optimization might be possible.'''
412 optimization might be possible.'''
413 return False
413 return False
414
414
415 def prefix(self):
415 def prefix(self):
416 '''Matcher will match the paths in .files() recursively --
416 '''Matcher will match the paths in .files() recursively --
417 optimization might be possible.'''
417 optimization might be possible.'''
418 return False
418 return False
419
419
420 def anypats(self):
420 def anypats(self):
421 '''None of .always(), .isexact(), and .prefix() is true --
421 '''None of .always(), .isexact(), and .prefix() is true --
422 optimizations will be difficult.'''
422 optimizations will be difficult.'''
423 return not self.always() and not self.isexact() and not self.prefix()
423 return not self.always() and not self.isexact() and not self.prefix()
424
424
425 class alwaysmatcher(basematcher):
425 class alwaysmatcher(basematcher):
426 '''Matches everything.'''
426 '''Matches everything.'''
427
427
428 def __init__(self, badfn=None):
428 def __init__(self, badfn=None):
429 super(alwaysmatcher, self).__init__(badfn)
429 super(alwaysmatcher, self).__init__(badfn)
430
430
431 def always(self):
431 def always(self):
432 return True
432 return True
433
433
434 def matchfn(self, f):
434 def matchfn(self, f):
435 return True
435 return True
436
436
437 def visitdir(self, dir):
437 def visitdir(self, dir):
438 return 'all'
438 return 'all'
439
439
440 def visitchildrenset(self, dir):
440 def visitchildrenset(self, dir):
441 return 'all'
441 return 'all'
442
442
443 def __repr__(self):
443 def __repr__(self):
444 return r'<alwaysmatcher>'
444 return r'<alwaysmatcher>'
445
445
446 class nevermatcher(basematcher):
446 class nevermatcher(basematcher):
447 '''Matches nothing.'''
447 '''Matches nothing.'''
448
448
449 def __init__(self, badfn=None):
449 def __init__(self, badfn=None):
450 super(nevermatcher, self).__init__(badfn)
450 super(nevermatcher, self).__init__(badfn)
451
451
452 # It's a little weird to say that the nevermatcher is an exact matcher
452 # It's a little weird to say that the nevermatcher is an exact matcher
453 # or a prefix matcher, but it seems to make sense to let callers take
453 # or a prefix matcher, but it seems to make sense to let callers take
454 # fast paths based on either. There will be no exact matches, nor any
454 # fast paths based on either. There will be no exact matches, nor any
455 # prefixes (files() returns []), so fast paths iterating over them should
455 # prefixes (files() returns []), so fast paths iterating over them should
456 # be efficient (and correct).
456 # be efficient (and correct).
457 def isexact(self):
457 def isexact(self):
458 return True
458 return True
459
459
460 def prefix(self):
460 def prefix(self):
461 return True
461 return True
462
462
463 def visitdir(self, dir):
463 def visitdir(self, dir):
464 return False
464 return False
465
465
466 def visitchildrenset(self, dir):
466 def visitchildrenset(self, dir):
467 return set()
467 return set()
468
468
469 def __repr__(self):
469 def __repr__(self):
470 return r'<nevermatcher>'
470 return r'<nevermatcher>'
471
471
472 class predicatematcher(basematcher):
472 class predicatematcher(basematcher):
473 """A matcher adapter for a simple boolean function"""
473 """A matcher adapter for a simple boolean function"""
474
474
475 def __init__(self, predfn, predrepr=None, badfn=None):
475 def __init__(self, predfn, predrepr=None, badfn=None):
476 super(predicatematcher, self).__init__(badfn)
476 super(predicatematcher, self).__init__(badfn)
477 self.matchfn = predfn
477 self.matchfn = predfn
478 self._predrepr = predrepr
478 self._predrepr = predrepr
479
479
480 @encoding.strmethod
480 @encoding.strmethod
481 def __repr__(self):
481 def __repr__(self):
482 s = (stringutil.buildrepr(self._predrepr)
482 s = (stringutil.buildrepr(self._predrepr)
483 or pycompat.byterepr(self.matchfn))
483 or pycompat.byterepr(self.matchfn))
484 return '<predicatenmatcher pred=%s>' % s
484 return '<predicatenmatcher pred=%s>' % s
485
485
486 def normalizerootdir(dir, funcname):
486 def normalizerootdir(dir, funcname):
487 if dir == '.':
487 if dir == '.':
488 util.nouideprecwarn("match.%s() no longer accepts "
488 util.nouideprecwarn("match.%s() no longer accepts "
489 "'.', use '' instead." % funcname, '5.1')
489 "'.', use '' instead." % funcname, '5.1')
490 return ''
490 return ''
491 return dir
491 return dir
492
492
493
493
494 class patternmatcher(basematcher):
494 class patternmatcher(basematcher):
495 """Matches a set of (kind, pat, source) against a 'root' directory.
495 """Matches a set of (kind, pat, source) against a 'root' directory.
496
496
497 >>> kindpats = [
497 >>> kindpats = [
498 ... (b're', br'.*\.c$', b''),
498 ... (b're', br'.*\.c$', b''),
499 ... (b'path', b'foo/a', b''),
499 ... (b'path', b'foo/a', b''),
500 ... (b'relpath', b'b', b''),
500 ... (b'relpath', b'b', b''),
501 ... (b'glob', b'*.h', b''),
501 ... (b'glob', b'*.h', b''),
502 ... ]
502 ... ]
503 >>> m = patternmatcher(b'foo', kindpats)
503 >>> m = patternmatcher(b'foo', kindpats)
504 >>> m(b'main.c') # matches re:.*\.c$
504 >>> m(b'main.c') # matches re:.*\.c$
505 True
505 True
506 >>> m(b'b.txt')
506 >>> m(b'b.txt')
507 False
507 False
508 >>> m(b'foo/a') # matches path:foo/a
508 >>> m(b'foo/a') # matches path:foo/a
509 True
509 True
510 >>> m(b'a') # does not match path:b, since 'root' is 'foo'
510 >>> m(b'a') # does not match path:b, since 'root' is 'foo'
511 False
511 False
512 >>> m(b'b') # matches relpath:b, since 'root' is 'foo'
512 >>> m(b'b') # matches relpath:b, since 'root' is 'foo'
513 True
513 True
514 >>> m(b'lib.h') # matches glob:*.h
514 >>> m(b'lib.h') # matches glob:*.h
515 True
515 True
516
516
517 >>> m.files()
517 >>> m.files()
518 ['', 'foo/a', 'b', '']
518 ['', 'foo/a', 'b', '']
519 >>> m.exact(b'foo/a')
519 >>> m.exact(b'foo/a')
520 True
520 True
521 >>> m.exact(b'b')
521 >>> m.exact(b'b')
522 True
522 True
523 >>> m.exact(b'lib.h') # exact matches are for (rel)path kinds
523 >>> m.exact(b'lib.h') # exact matches are for (rel)path kinds
524 False
524 False
525 """
525 """
526
526
527 def __init__(self, root, kindpats, badfn=None):
527 def __init__(self, root, kindpats, badfn=None):
528 super(patternmatcher, self).__init__(badfn)
528 super(patternmatcher, self).__init__(badfn)
529
529
530 self._files = _explicitfiles(kindpats)
530 self._files = _explicitfiles(kindpats)
531 self._prefix = _prefix(kindpats)
531 self._prefix = _prefix(kindpats)
532 self._pats, self.matchfn = _buildmatch(kindpats, '$', root)
532 self._pats, self.matchfn = _buildmatch(kindpats, '$', root)
533
533
534 @propertycache
534 @propertycache
535 def _dirs(self):
535 def _dirs(self):
536 return set(util.dirs(self._fileset))
536 return set(util.dirs(self._fileset))
537
537
538 def visitdir(self, dir):
538 def visitdir(self, dir):
539 dir = normalizerootdir(dir, 'visitdir')
539 dir = normalizerootdir(dir, 'visitdir')
540 if self._prefix and dir in self._fileset:
540 if self._prefix and dir in self._fileset:
541 return 'all'
541 return 'all'
542 return (dir in self._fileset or
542 return (dir in self._fileset or
543 dir in self._dirs or
543 dir in self._dirs or
544 any(parentdir in self._fileset
544 any(parentdir in self._fileset
545 for parentdir in util.finddirs(dir)))
545 for parentdir in util.finddirs(dir)))
546
546
547 def visitchildrenset(self, dir):
547 def visitchildrenset(self, dir):
548 ret = self.visitdir(dir)
548 ret = self.visitdir(dir)
549 if ret is True:
549 if ret is True:
550 return 'this'
550 return 'this'
551 elif not ret:
551 elif not ret:
552 return set()
552 return set()
553 assert ret == 'all'
553 assert ret == 'all'
554 return 'all'
554 return 'all'
555
555
556 def prefix(self):
556 def prefix(self):
557 return self._prefix
557 return self._prefix
558
558
559 @encoding.strmethod
559 @encoding.strmethod
560 def __repr__(self):
560 def __repr__(self):
561 return ('<patternmatcher patterns=%r>' % pycompat.bytestr(self._pats))
561 return ('<patternmatcher patterns=%r>' % pycompat.bytestr(self._pats))
562
562
563 # This is basically a reimplementation of util.dirs that stores the children
563 # This is basically a reimplementation of util.dirs that stores the children
564 # instead of just a count of them, plus a small optional optimization to avoid
564 # instead of just a count of them, plus a small optional optimization to avoid
565 # some directories we don't need.
565 # some directories we don't need.
566 class _dirchildren(object):
566 class _dirchildren(object):
567 def __init__(self, paths, onlyinclude=None):
567 def __init__(self, paths, onlyinclude=None):
568 self._dirs = {}
568 self._dirs = {}
569 self._onlyinclude = onlyinclude or []
569 self._onlyinclude = onlyinclude or []
570 addpath = self.addpath
570 addpath = self.addpath
571 for f in paths:
571 for f in paths:
572 addpath(f)
572 addpath(f)
573
573
574 def addpath(self, path):
574 def addpath(self, path):
575 if path == '':
575 if path == '':
576 return
576 return
577 dirs = self._dirs
577 dirs = self._dirs
578 findsplitdirs = _dirchildren._findsplitdirs
578 findsplitdirs = _dirchildren._findsplitdirs
579 for d, b in findsplitdirs(path):
579 for d, b in findsplitdirs(path):
580 if d not in self._onlyinclude:
580 if d not in self._onlyinclude:
581 continue
581 continue
582 dirs.setdefault(d, set()).add(b)
582 dirs.setdefault(d, set()).add(b)
583
583
584 @staticmethod
584 @staticmethod
585 def _findsplitdirs(path):
585 def _findsplitdirs(path):
586 # yields (dirname, basename) tuples, walking back to the root. This is
586 # yields (dirname, basename) tuples, walking back to the root. This is
587 # very similar to util.finddirs, except:
587 # very similar to util.finddirs, except:
588 # - produces a (dirname, basename) tuple, not just 'dirname'
588 # - produces a (dirname, basename) tuple, not just 'dirname'
589 # - includes root dir
589 # - includes root dir
590 # Unlike manifest._splittopdir, this does not suffix `dirname` with a
590 # Unlike manifest._splittopdir, this does not suffix `dirname` with a
591 # slash.
591 # slash.
592 oldpos = len(path)
592 oldpos = len(path)
593 pos = path.rfind('/')
593 pos = path.rfind('/')
594 while pos != -1:
594 while pos != -1:
595 yield path[:pos], path[pos + 1:oldpos]
595 yield path[:pos], path[pos + 1:oldpos]
596 oldpos = pos
596 oldpos = pos
597 pos = path.rfind('/', 0, pos)
597 pos = path.rfind('/', 0, pos)
598 yield '', path[:oldpos]
598 yield '', path[:oldpos]
599
599
600 def get(self, path):
600 def get(self, path):
601 return self._dirs.get(path, set())
601 return self._dirs.get(path, set())
602
602
603 class includematcher(basematcher):
603 class includematcher(basematcher):
604
604
605 def __init__(self, root, kindpats, badfn=None):
605 def __init__(self, root, kindpats, badfn=None):
606 super(includematcher, self).__init__(badfn)
606 super(includematcher, self).__init__(badfn)
607
607
608 self._pats, self.matchfn = _buildmatch(kindpats, '(?:/|$)', root)
608 self._pats, self.matchfn = _buildmatch(kindpats, '(?:/|$)', root)
609 self._prefix = _prefix(kindpats)
609 self._prefix = _prefix(kindpats)
610 roots, dirs, parents = _rootsdirsandparents(kindpats)
610 roots, dirs, parents = _rootsdirsandparents(kindpats)
611 # roots are directories which are recursively included.
611 # roots are directories which are recursively included.
612 self._roots = set(roots)
612 self._roots = set(roots)
613 # dirs are directories which are non-recursively included.
613 # dirs are directories which are non-recursively included.
614 self._dirs = set(dirs)
614 self._dirs = set(dirs)
615 # parents are directories which are non-recursively included because
615 # parents are directories which are non-recursively included because
616 # they are needed to get to items in _dirs or _roots.
616 # they are needed to get to items in _dirs or _roots.
617 self._parents = set(parents)
617 self._parents = parents
618
618
619 def visitdir(self, dir):
619 def visitdir(self, dir):
620 dir = normalizerootdir(dir, 'visitdir')
620 dir = normalizerootdir(dir, 'visitdir')
621 if self._prefix and dir in self._roots:
621 if self._prefix and dir in self._roots:
622 return 'all'
622 return 'all'
623 return (dir in self._roots or
623 return (dir in self._roots or
624 dir in self._dirs or
624 dir in self._dirs or
625 dir in self._parents or
625 dir in self._parents or
626 any(parentdir in self._roots
626 any(parentdir in self._roots
627 for parentdir in util.finddirs(dir)))
627 for parentdir in util.finddirs(dir)))
628
628
629 @propertycache
629 @propertycache
630 def _allparentschildren(self):
630 def _allparentschildren(self):
631 # It may seem odd that we add dirs, roots, and parents, and then
631 # It may seem odd that we add dirs, roots, and parents, and then
632 # restrict to only parents. This is to catch the case of:
632 # restrict to only parents. This is to catch the case of:
633 # dirs = ['foo/bar']
633 # dirs = ['foo/bar']
634 # parents = ['foo']
634 # parents = ['foo']
635 # if we asked for the children of 'foo', but had only added
635 # if we asked for the children of 'foo', but had only added
636 # self._parents, we wouldn't be able to respond ['bar'].
636 # self._parents, we wouldn't be able to respond ['bar'].
637 return _dirchildren(
637 return _dirchildren(
638 itertools.chain(self._dirs, self._roots, self._parents),
638 itertools.chain(self._dirs, self._roots, self._parents),
639 onlyinclude=self._parents)
639 onlyinclude=self._parents)
640
640
641 def visitchildrenset(self, dir):
641 def visitchildrenset(self, dir):
642 if self._prefix and dir in self._roots:
642 if self._prefix and dir in self._roots:
643 return 'all'
643 return 'all'
644 # Note: this does *not* include the 'dir in self._parents' case from
644 # Note: this does *not* include the 'dir in self._parents' case from
645 # visitdir, that's handled below.
645 # visitdir, that's handled below.
646 if ('' in self._roots or
646 if ('' in self._roots or
647 dir in self._roots or
647 dir in self._roots or
648 dir in self._dirs or
648 dir in self._dirs or
649 any(parentdir in self._roots
649 any(parentdir in self._roots
650 for parentdir in util.finddirs(dir))):
650 for parentdir in util.finddirs(dir))):
651 return 'this'
651 return 'this'
652
652
653 if dir in self._parents:
653 if dir in self._parents:
654 return self._allparentschildren.get(dir) or set()
654 return self._allparentschildren.get(dir) or set()
655 return set()
655 return set()
656
656
657 @encoding.strmethod
657 @encoding.strmethod
658 def __repr__(self):
658 def __repr__(self):
659 return ('<includematcher includes=%r>' % pycompat.bytestr(self._pats))
659 return ('<includematcher includes=%r>' % pycompat.bytestr(self._pats))
660
660
661 class exactmatcher(basematcher):
661 class exactmatcher(basematcher):
662 r'''Matches the input files exactly. They are interpreted as paths, not
662 r'''Matches the input files exactly. They are interpreted as paths, not
663 patterns (so no kind-prefixes).
663 patterns (so no kind-prefixes).
664
664
665 >>> m = exactmatcher([b'a.txt', br're:.*\.c$'])
665 >>> m = exactmatcher([b'a.txt', br're:.*\.c$'])
666 >>> m(b'a.txt')
666 >>> m(b'a.txt')
667 True
667 True
668 >>> m(b'b.txt')
668 >>> m(b'b.txt')
669 False
669 False
670
670
671 Input files that would be matched are exactly those returned by .files()
671 Input files that would be matched are exactly those returned by .files()
672 >>> m.files()
672 >>> m.files()
673 ['a.txt', 're:.*\\.c$']
673 ['a.txt', 're:.*\\.c$']
674
674
675 So pattern 're:.*\.c$' is not considered as a regex, but as a file name
675 So pattern 're:.*\.c$' is not considered as a regex, but as a file name
676 >>> m(b'main.c')
676 >>> m(b'main.c')
677 False
677 False
678 >>> m(br're:.*\.c$')
678 >>> m(br're:.*\.c$')
679 True
679 True
680 '''
680 '''
681
681
682 def __init__(self, files, badfn=None):
682 def __init__(self, files, badfn=None):
683 super(exactmatcher, self).__init__(badfn)
683 super(exactmatcher, self).__init__(badfn)
684
684
685 if isinstance(files, list):
685 if isinstance(files, list):
686 self._files = files
686 self._files = files
687 else:
687 else:
688 self._files = list(files)
688 self._files = list(files)
689
689
690 matchfn = basematcher.exact
690 matchfn = basematcher.exact
691
691
692 @propertycache
692 @propertycache
693 def _dirs(self):
693 def _dirs(self):
694 return set(util.dirs(self._fileset))
694 return set(util.dirs(self._fileset))
695
695
696 def visitdir(self, dir):
696 def visitdir(self, dir):
697 dir = normalizerootdir(dir, 'visitdir')
697 dir = normalizerootdir(dir, 'visitdir')
698 return dir in self._dirs
698 return dir in self._dirs
699
699
700 def visitchildrenset(self, dir):
700 def visitchildrenset(self, dir):
701 dir = normalizerootdir(dir, 'visitchildrenset')
701 dir = normalizerootdir(dir, 'visitchildrenset')
702
702
703 if not self._fileset or dir not in self._dirs:
703 if not self._fileset or dir not in self._dirs:
704 return set()
704 return set()
705
705
706 candidates = self._fileset | self._dirs - {''}
706 candidates = self._fileset | self._dirs - {''}
707 if dir != '':
707 if dir != '':
708 d = dir + '/'
708 d = dir + '/'
709 candidates = set(c[len(d):] for c in candidates if
709 candidates = set(c[len(d):] for c in candidates if
710 c.startswith(d))
710 c.startswith(d))
711 # self._dirs includes all of the directories, recursively, so if
711 # self._dirs includes all of the directories, recursively, so if
712 # we're attempting to match foo/bar/baz.txt, it'll have '', 'foo',
712 # we're attempting to match foo/bar/baz.txt, it'll have '', 'foo',
713 # 'foo/bar' in it. Thus we can safely ignore a candidate that has a
713 # 'foo/bar' in it. Thus we can safely ignore a candidate that has a
714 # '/' in it, indicating a it's for a subdir-of-a-subdir; the
714 # '/' in it, indicating a it's for a subdir-of-a-subdir; the
715 # immediate subdir will be in there without a slash.
715 # immediate subdir will be in there without a slash.
716 ret = {c for c in candidates if '/' not in c}
716 ret = {c for c in candidates if '/' not in c}
717 # We really do not expect ret to be empty, since that would imply that
717 # We really do not expect ret to be empty, since that would imply that
718 # there's something in _dirs that didn't have a file in _fileset.
718 # there's something in _dirs that didn't have a file in _fileset.
719 assert ret
719 assert ret
720 return ret
720 return ret
721
721
722 def isexact(self):
722 def isexact(self):
723 return True
723 return True
724
724
725 @encoding.strmethod
725 @encoding.strmethod
726 def __repr__(self):
726 def __repr__(self):
727 return ('<exactmatcher files=%r>' % self._files)
727 return ('<exactmatcher files=%r>' % self._files)
728
728
729 class differencematcher(basematcher):
729 class differencematcher(basematcher):
730 '''Composes two matchers by matching if the first matches and the second
730 '''Composes two matchers by matching if the first matches and the second
731 does not.
731 does not.
732
732
733 The second matcher's non-matching-attributes (bad, explicitdir,
733 The second matcher's non-matching-attributes (bad, explicitdir,
734 traversedir) are ignored.
734 traversedir) are ignored.
735 '''
735 '''
736 def __init__(self, m1, m2):
736 def __init__(self, m1, m2):
737 super(differencematcher, self).__init__()
737 super(differencematcher, self).__init__()
738 self._m1 = m1
738 self._m1 = m1
739 self._m2 = m2
739 self._m2 = m2
740 self.bad = m1.bad
740 self.bad = m1.bad
741 self.explicitdir = m1.explicitdir
741 self.explicitdir = m1.explicitdir
742 self.traversedir = m1.traversedir
742 self.traversedir = m1.traversedir
743
743
744 def matchfn(self, f):
744 def matchfn(self, f):
745 return self._m1(f) and not self._m2(f)
745 return self._m1(f) and not self._m2(f)
746
746
747 @propertycache
747 @propertycache
748 def _files(self):
748 def _files(self):
749 if self.isexact():
749 if self.isexact():
750 return [f for f in self._m1.files() if self(f)]
750 return [f for f in self._m1.files() if self(f)]
751 # If m1 is not an exact matcher, we can't easily figure out the set of
751 # If m1 is not an exact matcher, we can't easily figure out the set of
752 # files, because its files() are not always files. For example, if
752 # files, because its files() are not always files. For example, if
753 # m1 is "path:dir" and m2 is "rootfileins:.", we don't
753 # m1 is "path:dir" and m2 is "rootfileins:.", we don't
754 # want to remove "dir" from the set even though it would match m2,
754 # want to remove "dir" from the set even though it would match m2,
755 # because the "dir" in m1 may not be a file.
755 # because the "dir" in m1 may not be a file.
756 return self._m1.files()
756 return self._m1.files()
757
757
758 def visitdir(self, dir):
758 def visitdir(self, dir):
759 if self._m2.visitdir(dir) == 'all':
759 if self._m2.visitdir(dir) == 'all':
760 return False
760 return False
761 elif not self._m2.visitdir(dir):
761 elif not self._m2.visitdir(dir):
762 # m2 does not match dir, we can return 'all' here if possible
762 # m2 does not match dir, we can return 'all' here if possible
763 return self._m1.visitdir(dir)
763 return self._m1.visitdir(dir)
764 return bool(self._m1.visitdir(dir))
764 return bool(self._m1.visitdir(dir))
765
765
766 def visitchildrenset(self, dir):
766 def visitchildrenset(self, dir):
767 m2_set = self._m2.visitchildrenset(dir)
767 m2_set = self._m2.visitchildrenset(dir)
768 if m2_set == 'all':
768 if m2_set == 'all':
769 return set()
769 return set()
770 m1_set = self._m1.visitchildrenset(dir)
770 m1_set = self._m1.visitchildrenset(dir)
771 # Possible values for m1: 'all', 'this', set(...), set()
771 # Possible values for m1: 'all', 'this', set(...), set()
772 # Possible values for m2: 'this', set(...), set()
772 # Possible values for m2: 'this', set(...), set()
773 # If m2 has nothing under here that we care about, return m1, even if
773 # If m2 has nothing under here that we care about, return m1, even if
774 # it's 'all'. This is a change in behavior from visitdir, which would
774 # it's 'all'. This is a change in behavior from visitdir, which would
775 # return True, not 'all', for some reason.
775 # return True, not 'all', for some reason.
776 if not m2_set:
776 if not m2_set:
777 return m1_set
777 return m1_set
778 if m1_set in ['all', 'this']:
778 if m1_set in ['all', 'this']:
779 # Never return 'all' here if m2_set is any kind of non-empty (either
779 # Never return 'all' here if m2_set is any kind of non-empty (either
780 # 'this' or set(foo)), since m2 might return set() for a
780 # 'this' or set(foo)), since m2 might return set() for a
781 # subdirectory.
781 # subdirectory.
782 return 'this'
782 return 'this'
783 # Possible values for m1: set(...), set()
783 # Possible values for m1: set(...), set()
784 # Possible values for m2: 'this', set(...)
784 # Possible values for m2: 'this', set(...)
785 # We ignore m2's set results. They're possibly incorrect:
785 # We ignore m2's set results. They're possibly incorrect:
786 # m1 = path:dir/subdir, m2=rootfilesin:dir, visitchildrenset(''):
786 # m1 = path:dir/subdir, m2=rootfilesin:dir, visitchildrenset(''):
787 # m1 returns {'dir'}, m2 returns {'dir'}, if we subtracted we'd
787 # m1 returns {'dir'}, m2 returns {'dir'}, if we subtracted we'd
788 # return set(), which is *not* correct, we still need to visit 'dir'!
788 # return set(), which is *not* correct, we still need to visit 'dir'!
789 return m1_set
789 return m1_set
790
790
791 def isexact(self):
791 def isexact(self):
792 return self._m1.isexact()
792 return self._m1.isexact()
793
793
794 @encoding.strmethod
794 @encoding.strmethod
795 def __repr__(self):
795 def __repr__(self):
796 return ('<differencematcher m1=%r, m2=%r>' % (self._m1, self._m2))
796 return ('<differencematcher m1=%r, m2=%r>' % (self._m1, self._m2))
797
797
798 def intersectmatchers(m1, m2):
798 def intersectmatchers(m1, m2):
799 '''Composes two matchers by matching if both of them match.
799 '''Composes two matchers by matching if both of them match.
800
800
801 The second matcher's non-matching-attributes (bad, explicitdir,
801 The second matcher's non-matching-attributes (bad, explicitdir,
802 traversedir) are ignored.
802 traversedir) are ignored.
803 '''
803 '''
804 if m1 is None or m2 is None:
804 if m1 is None or m2 is None:
805 return m1 or m2
805 return m1 or m2
806 if m1.always():
806 if m1.always():
807 m = copy.copy(m2)
807 m = copy.copy(m2)
808 # TODO: Consider encapsulating these things in a class so there's only
808 # TODO: Consider encapsulating these things in a class so there's only
809 # one thing to copy from m1.
809 # one thing to copy from m1.
810 m.bad = m1.bad
810 m.bad = m1.bad
811 m.explicitdir = m1.explicitdir
811 m.explicitdir = m1.explicitdir
812 m.traversedir = m1.traversedir
812 m.traversedir = m1.traversedir
813 return m
813 return m
814 if m2.always():
814 if m2.always():
815 m = copy.copy(m1)
815 m = copy.copy(m1)
816 return m
816 return m
817 return intersectionmatcher(m1, m2)
817 return intersectionmatcher(m1, m2)
818
818
819 class intersectionmatcher(basematcher):
819 class intersectionmatcher(basematcher):
820 def __init__(self, m1, m2):
820 def __init__(self, m1, m2):
821 super(intersectionmatcher, self).__init__()
821 super(intersectionmatcher, self).__init__()
822 self._m1 = m1
822 self._m1 = m1
823 self._m2 = m2
823 self._m2 = m2
824 self.bad = m1.bad
824 self.bad = m1.bad
825 self.explicitdir = m1.explicitdir
825 self.explicitdir = m1.explicitdir
826 self.traversedir = m1.traversedir
826 self.traversedir = m1.traversedir
827
827
828 @propertycache
828 @propertycache
829 def _files(self):
829 def _files(self):
830 if self.isexact():
830 if self.isexact():
831 m1, m2 = self._m1, self._m2
831 m1, m2 = self._m1, self._m2
832 if not m1.isexact():
832 if not m1.isexact():
833 m1, m2 = m2, m1
833 m1, m2 = m2, m1
834 return [f for f in m1.files() if m2(f)]
834 return [f for f in m1.files() if m2(f)]
835 # It neither m1 nor m2 is an exact matcher, we can't easily intersect
835 # It neither m1 nor m2 is an exact matcher, we can't easily intersect
836 # the set of files, because their files() are not always files. For
836 # the set of files, because their files() are not always files. For
837 # example, if intersecting a matcher "-I glob:foo.txt" with matcher of
837 # example, if intersecting a matcher "-I glob:foo.txt" with matcher of
838 # "path:dir2", we don't want to remove "dir2" from the set.
838 # "path:dir2", we don't want to remove "dir2" from the set.
839 return self._m1.files() + self._m2.files()
839 return self._m1.files() + self._m2.files()
840
840
841 def matchfn(self, f):
841 def matchfn(self, f):
842 return self._m1(f) and self._m2(f)
842 return self._m1(f) and self._m2(f)
843
843
844 def visitdir(self, dir):
844 def visitdir(self, dir):
845 visit1 = self._m1.visitdir(dir)
845 visit1 = self._m1.visitdir(dir)
846 if visit1 == 'all':
846 if visit1 == 'all':
847 return self._m2.visitdir(dir)
847 return self._m2.visitdir(dir)
848 # bool() because visit1=True + visit2='all' should not be 'all'
848 # bool() because visit1=True + visit2='all' should not be 'all'
849 return bool(visit1 and self._m2.visitdir(dir))
849 return bool(visit1 and self._m2.visitdir(dir))
850
850
851 def visitchildrenset(self, dir):
851 def visitchildrenset(self, dir):
852 m1_set = self._m1.visitchildrenset(dir)
852 m1_set = self._m1.visitchildrenset(dir)
853 if not m1_set:
853 if not m1_set:
854 return set()
854 return set()
855 m2_set = self._m2.visitchildrenset(dir)
855 m2_set = self._m2.visitchildrenset(dir)
856 if not m2_set:
856 if not m2_set:
857 return set()
857 return set()
858
858
859 if m1_set == 'all':
859 if m1_set == 'all':
860 return m2_set
860 return m2_set
861 elif m2_set == 'all':
861 elif m2_set == 'all':
862 return m1_set
862 return m1_set
863
863
864 if m1_set == 'this' or m2_set == 'this':
864 if m1_set == 'this' or m2_set == 'this':
865 return 'this'
865 return 'this'
866
866
867 assert isinstance(m1_set, set) and isinstance(m2_set, set)
867 assert isinstance(m1_set, set) and isinstance(m2_set, set)
868 return m1_set.intersection(m2_set)
868 return m1_set.intersection(m2_set)
869
869
870 def always(self):
870 def always(self):
871 return self._m1.always() and self._m2.always()
871 return self._m1.always() and self._m2.always()
872
872
873 def isexact(self):
873 def isexact(self):
874 return self._m1.isexact() or self._m2.isexact()
874 return self._m1.isexact() or self._m2.isexact()
875
875
876 @encoding.strmethod
876 @encoding.strmethod
877 def __repr__(self):
877 def __repr__(self):
878 return ('<intersectionmatcher m1=%r, m2=%r>' % (self._m1, self._m2))
878 return ('<intersectionmatcher m1=%r, m2=%r>' % (self._m1, self._m2))
879
879
880 class subdirmatcher(basematcher):
880 class subdirmatcher(basematcher):
881 """Adapt a matcher to work on a subdirectory only.
881 """Adapt a matcher to work on a subdirectory only.
882
882
883 The paths are remapped to remove/insert the path as needed:
883 The paths are remapped to remove/insert the path as needed:
884
884
885 >>> from . import pycompat
885 >>> from . import pycompat
886 >>> m1 = match(b'root', b'', [b'a.txt', b'sub/b.txt'])
886 >>> m1 = match(b'root', b'', [b'a.txt', b'sub/b.txt'])
887 >>> m2 = subdirmatcher(b'sub', m1)
887 >>> m2 = subdirmatcher(b'sub', m1)
888 >>> m2(b'a.txt')
888 >>> m2(b'a.txt')
889 False
889 False
890 >>> m2(b'b.txt')
890 >>> m2(b'b.txt')
891 True
891 True
892 >>> m2.matchfn(b'a.txt')
892 >>> m2.matchfn(b'a.txt')
893 False
893 False
894 >>> m2.matchfn(b'b.txt')
894 >>> m2.matchfn(b'b.txt')
895 True
895 True
896 >>> m2.files()
896 >>> m2.files()
897 ['b.txt']
897 ['b.txt']
898 >>> m2.exact(b'b.txt')
898 >>> m2.exact(b'b.txt')
899 True
899 True
900 >>> def bad(f, msg):
900 >>> def bad(f, msg):
901 ... print(pycompat.sysstr(b"%s: %s" % (f, msg)))
901 ... print(pycompat.sysstr(b"%s: %s" % (f, msg)))
902 >>> m1.bad = bad
902 >>> m1.bad = bad
903 >>> m2.bad(b'x.txt', b'No such file')
903 >>> m2.bad(b'x.txt', b'No such file')
904 sub/x.txt: No such file
904 sub/x.txt: No such file
905 """
905 """
906
906
907 def __init__(self, path, matcher):
907 def __init__(self, path, matcher):
908 super(subdirmatcher, self).__init__()
908 super(subdirmatcher, self).__init__()
909 self._path = path
909 self._path = path
910 self._matcher = matcher
910 self._matcher = matcher
911 self._always = matcher.always()
911 self._always = matcher.always()
912
912
913 self._files = [f[len(path) + 1:] for f in matcher._files
913 self._files = [f[len(path) + 1:] for f in matcher._files
914 if f.startswith(path + "/")]
914 if f.startswith(path + "/")]
915
915
916 # If the parent repo had a path to this subrepo and the matcher is
916 # If the parent repo had a path to this subrepo and the matcher is
917 # a prefix matcher, this submatcher always matches.
917 # a prefix matcher, this submatcher always matches.
918 if matcher.prefix():
918 if matcher.prefix():
919 self._always = any(f == path for f in matcher._files)
919 self._always = any(f == path for f in matcher._files)
920
920
921 def bad(self, f, msg):
921 def bad(self, f, msg):
922 self._matcher.bad(self._path + "/" + f, msg)
922 self._matcher.bad(self._path + "/" + f, msg)
923
923
924 def matchfn(self, f):
924 def matchfn(self, f):
925 # Some information is lost in the superclass's constructor, so we
925 # Some information is lost in the superclass's constructor, so we
926 # can not accurately create the matching function for the subdirectory
926 # can not accurately create the matching function for the subdirectory
927 # from the inputs. Instead, we override matchfn() and visitdir() to
927 # from the inputs. Instead, we override matchfn() and visitdir() to
928 # call the original matcher with the subdirectory path prepended.
928 # call the original matcher with the subdirectory path prepended.
929 return self._matcher.matchfn(self._path + "/" + f)
929 return self._matcher.matchfn(self._path + "/" + f)
930
930
931 def visitdir(self, dir):
931 def visitdir(self, dir):
932 dir = normalizerootdir(dir, 'visitdir')
932 dir = normalizerootdir(dir, 'visitdir')
933 if dir == '':
933 if dir == '':
934 dir = self._path
934 dir = self._path
935 else:
935 else:
936 dir = self._path + "/" + dir
936 dir = self._path + "/" + dir
937 return self._matcher.visitdir(dir)
937 return self._matcher.visitdir(dir)
938
938
939 def visitchildrenset(self, dir):
939 def visitchildrenset(self, dir):
940 dir = normalizerootdir(dir, 'visitchildrenset')
940 dir = normalizerootdir(dir, 'visitchildrenset')
941 if dir == '':
941 if dir == '':
942 dir = self._path
942 dir = self._path
943 else:
943 else:
944 dir = self._path + "/" + dir
944 dir = self._path + "/" + dir
945 return self._matcher.visitchildrenset(dir)
945 return self._matcher.visitchildrenset(dir)
946
946
947 def always(self):
947 def always(self):
948 return self._always
948 return self._always
949
949
950 def prefix(self):
950 def prefix(self):
951 return self._matcher.prefix() and not self._always
951 return self._matcher.prefix() and not self._always
952
952
953 @encoding.strmethod
953 @encoding.strmethod
954 def __repr__(self):
954 def __repr__(self):
955 return ('<subdirmatcher path=%r, matcher=%r>' %
955 return ('<subdirmatcher path=%r, matcher=%r>' %
956 (self._path, self._matcher))
956 (self._path, self._matcher))
957
957
958 class prefixdirmatcher(basematcher):
958 class prefixdirmatcher(basematcher):
959 """Adapt a matcher to work on a parent directory.
959 """Adapt a matcher to work on a parent directory.
960
960
961 The matcher's non-matching-attributes (bad, explicitdir, traversedir) are
961 The matcher's non-matching-attributes (bad, explicitdir, traversedir) are
962 ignored.
962 ignored.
963
963
964 The prefix path should usually be the relative path from the root of
964 The prefix path should usually be the relative path from the root of
965 this matcher to the root of the wrapped matcher.
965 this matcher to the root of the wrapped matcher.
966
966
967 >>> m1 = match(util.localpath(b'root/d/e'), b'f', [b'../a.txt', b'b.txt'])
967 >>> m1 = match(util.localpath(b'root/d/e'), b'f', [b'../a.txt', b'b.txt'])
968 >>> m2 = prefixdirmatcher(b'd/e', m1)
968 >>> m2 = prefixdirmatcher(b'd/e', m1)
969 >>> m2(b'a.txt')
969 >>> m2(b'a.txt')
970 False
970 False
971 >>> m2(b'd/e/a.txt')
971 >>> m2(b'd/e/a.txt')
972 True
972 True
973 >>> m2(b'd/e/b.txt')
973 >>> m2(b'd/e/b.txt')
974 False
974 False
975 >>> m2.files()
975 >>> m2.files()
976 ['d/e/a.txt', 'd/e/f/b.txt']
976 ['d/e/a.txt', 'd/e/f/b.txt']
977 >>> m2.exact(b'd/e/a.txt')
977 >>> m2.exact(b'd/e/a.txt')
978 True
978 True
979 >>> m2.visitdir(b'd')
979 >>> m2.visitdir(b'd')
980 True
980 True
981 >>> m2.visitdir(b'd/e')
981 >>> m2.visitdir(b'd/e')
982 True
982 True
983 >>> m2.visitdir(b'd/e/f')
983 >>> m2.visitdir(b'd/e/f')
984 True
984 True
985 >>> m2.visitdir(b'd/e/g')
985 >>> m2.visitdir(b'd/e/g')
986 False
986 False
987 >>> m2.visitdir(b'd/ef')
987 >>> m2.visitdir(b'd/ef')
988 False
988 False
989 """
989 """
990
990
991 def __init__(self, path, matcher, badfn=None):
991 def __init__(self, path, matcher, badfn=None):
992 super(prefixdirmatcher, self).__init__(badfn)
992 super(prefixdirmatcher, self).__init__(badfn)
993 if not path:
993 if not path:
994 raise error.ProgrammingError('prefix path must not be empty')
994 raise error.ProgrammingError('prefix path must not be empty')
995 self._path = path
995 self._path = path
996 self._pathprefix = path + '/'
996 self._pathprefix = path + '/'
997 self._matcher = matcher
997 self._matcher = matcher
998
998
999 @propertycache
999 @propertycache
1000 def _files(self):
1000 def _files(self):
1001 return [self._pathprefix + f for f in self._matcher._files]
1001 return [self._pathprefix + f for f in self._matcher._files]
1002
1002
1003 def matchfn(self, f):
1003 def matchfn(self, f):
1004 if not f.startswith(self._pathprefix):
1004 if not f.startswith(self._pathprefix):
1005 return False
1005 return False
1006 return self._matcher.matchfn(f[len(self._pathprefix):])
1006 return self._matcher.matchfn(f[len(self._pathprefix):])
1007
1007
1008 @propertycache
1008 @propertycache
1009 def _pathdirs(self):
1009 def _pathdirs(self):
1010 return set(util.finddirs(self._path))
1010 return set(util.finddirs(self._path))
1011
1011
1012 def visitdir(self, dir):
1012 def visitdir(self, dir):
1013 if dir == self._path:
1013 if dir == self._path:
1014 return self._matcher.visitdir('')
1014 return self._matcher.visitdir('')
1015 if dir.startswith(self._pathprefix):
1015 if dir.startswith(self._pathprefix):
1016 return self._matcher.visitdir(dir[len(self._pathprefix):])
1016 return self._matcher.visitdir(dir[len(self._pathprefix):])
1017 return dir in self._pathdirs
1017 return dir in self._pathdirs
1018
1018
1019 def visitchildrenset(self, dir):
1019 def visitchildrenset(self, dir):
1020 if dir == self._path:
1020 if dir == self._path:
1021 return self._matcher.visitchildrenset('')
1021 return self._matcher.visitchildrenset('')
1022 if dir.startswith(self._pathprefix):
1022 if dir.startswith(self._pathprefix):
1023 return self._matcher.visitchildrenset(dir[len(self._pathprefix):])
1023 return self._matcher.visitchildrenset(dir[len(self._pathprefix):])
1024 if dir in self._pathdirs:
1024 if dir in self._pathdirs:
1025 return 'this'
1025 return 'this'
1026 return set()
1026 return set()
1027
1027
1028 def isexact(self):
1028 def isexact(self):
1029 return self._matcher.isexact()
1029 return self._matcher.isexact()
1030
1030
1031 def prefix(self):
1031 def prefix(self):
1032 return self._matcher.prefix()
1032 return self._matcher.prefix()
1033
1033
1034 @encoding.strmethod
1034 @encoding.strmethod
1035 def __repr__(self):
1035 def __repr__(self):
1036 return ('<prefixdirmatcher path=%r, matcher=%r>'
1036 return ('<prefixdirmatcher path=%r, matcher=%r>'
1037 % (pycompat.bytestr(self._path), self._matcher))
1037 % (pycompat.bytestr(self._path), self._matcher))
1038
1038
1039 class unionmatcher(basematcher):
1039 class unionmatcher(basematcher):
1040 """A matcher that is the union of several matchers.
1040 """A matcher that is the union of several matchers.
1041
1041
1042 The non-matching-attributes (bad, explicitdir, traversedir) are taken from
1042 The non-matching-attributes (bad, explicitdir, traversedir) are taken from
1043 the first matcher.
1043 the first matcher.
1044 """
1044 """
1045
1045
1046 def __init__(self, matchers):
1046 def __init__(self, matchers):
1047 m1 = matchers[0]
1047 m1 = matchers[0]
1048 super(unionmatcher, self).__init__()
1048 super(unionmatcher, self).__init__()
1049 self.explicitdir = m1.explicitdir
1049 self.explicitdir = m1.explicitdir
1050 self.traversedir = m1.traversedir
1050 self.traversedir = m1.traversedir
1051 self._matchers = matchers
1051 self._matchers = matchers
1052
1052
1053 def matchfn(self, f):
1053 def matchfn(self, f):
1054 for match in self._matchers:
1054 for match in self._matchers:
1055 if match(f):
1055 if match(f):
1056 return True
1056 return True
1057 return False
1057 return False
1058
1058
1059 def visitdir(self, dir):
1059 def visitdir(self, dir):
1060 r = False
1060 r = False
1061 for m in self._matchers:
1061 for m in self._matchers:
1062 v = m.visitdir(dir)
1062 v = m.visitdir(dir)
1063 if v == 'all':
1063 if v == 'all':
1064 return v
1064 return v
1065 r |= v
1065 r |= v
1066 return r
1066 return r
1067
1067
1068 def visitchildrenset(self, dir):
1068 def visitchildrenset(self, dir):
1069 r = set()
1069 r = set()
1070 this = False
1070 this = False
1071 for m in self._matchers:
1071 for m in self._matchers:
1072 v = m.visitchildrenset(dir)
1072 v = m.visitchildrenset(dir)
1073 if not v:
1073 if not v:
1074 continue
1074 continue
1075 if v == 'all':
1075 if v == 'all':
1076 return v
1076 return v
1077 if this or v == 'this':
1077 if this or v == 'this':
1078 this = True
1078 this = True
1079 # don't break, we might have an 'all' in here.
1079 # don't break, we might have an 'all' in here.
1080 continue
1080 continue
1081 assert isinstance(v, set)
1081 assert isinstance(v, set)
1082 r = r.union(v)
1082 r = r.union(v)
1083 if this:
1083 if this:
1084 return 'this'
1084 return 'this'
1085 return r
1085 return r
1086
1086
1087 @encoding.strmethod
1087 @encoding.strmethod
1088 def __repr__(self):
1088 def __repr__(self):
1089 return ('<unionmatcher matchers=%r>' % self._matchers)
1089 return ('<unionmatcher matchers=%r>' % self._matchers)
1090
1090
1091 def patkind(pattern, default=None):
1091 def patkind(pattern, default=None):
1092 '''If pattern is 'kind:pat' with a known kind, return kind.
1092 '''If pattern is 'kind:pat' with a known kind, return kind.
1093
1093
1094 >>> patkind(br're:.*\.c$')
1094 >>> patkind(br're:.*\.c$')
1095 're'
1095 're'
1096 >>> patkind(b'glob:*.c')
1096 >>> patkind(b'glob:*.c')
1097 'glob'
1097 'glob'
1098 >>> patkind(b'relpath:test.py')
1098 >>> patkind(b'relpath:test.py')
1099 'relpath'
1099 'relpath'
1100 >>> patkind(b'main.py')
1100 >>> patkind(b'main.py')
1101 >>> patkind(b'main.py', default=b're')
1101 >>> patkind(b'main.py', default=b're')
1102 're'
1102 're'
1103 '''
1103 '''
1104 return _patsplit(pattern, default)[0]
1104 return _patsplit(pattern, default)[0]
1105
1105
1106 def _patsplit(pattern, default):
1106 def _patsplit(pattern, default):
1107 """Split a string into the optional pattern kind prefix and the actual
1107 """Split a string into the optional pattern kind prefix and the actual
1108 pattern."""
1108 pattern."""
1109 if ':' in pattern:
1109 if ':' in pattern:
1110 kind, pat = pattern.split(':', 1)
1110 kind, pat = pattern.split(':', 1)
1111 if kind in allpatternkinds:
1111 if kind in allpatternkinds:
1112 return kind, pat
1112 return kind, pat
1113 return default, pattern
1113 return default, pattern
1114
1114
1115 def _globre(pat):
1115 def _globre(pat):
1116 r'''Convert an extended glob string to a regexp string.
1116 r'''Convert an extended glob string to a regexp string.
1117
1117
1118 >>> from . import pycompat
1118 >>> from . import pycompat
1119 >>> def bprint(s):
1119 >>> def bprint(s):
1120 ... print(pycompat.sysstr(s))
1120 ... print(pycompat.sysstr(s))
1121 >>> bprint(_globre(br'?'))
1121 >>> bprint(_globre(br'?'))
1122 .
1122 .
1123 >>> bprint(_globre(br'*'))
1123 >>> bprint(_globre(br'*'))
1124 [^/]*
1124 [^/]*
1125 >>> bprint(_globre(br'**'))
1125 >>> bprint(_globre(br'**'))
1126 .*
1126 .*
1127 >>> bprint(_globre(br'**/a'))
1127 >>> bprint(_globre(br'**/a'))
1128 (?:.*/)?a
1128 (?:.*/)?a
1129 >>> bprint(_globre(br'a/**/b'))
1129 >>> bprint(_globre(br'a/**/b'))
1130 a/(?:.*/)?b
1130 a/(?:.*/)?b
1131 >>> bprint(_globre(br'[a*?!^][^b][!c]'))
1131 >>> bprint(_globre(br'[a*?!^][^b][!c]'))
1132 [a*?!^][\^b][^c]
1132 [a*?!^][\^b][^c]
1133 >>> bprint(_globre(br'{a,b}'))
1133 >>> bprint(_globre(br'{a,b}'))
1134 (?:a|b)
1134 (?:a|b)
1135 >>> bprint(_globre(br'.\*\?'))
1135 >>> bprint(_globre(br'.\*\?'))
1136 \.\*\?
1136 \.\*\?
1137 '''
1137 '''
1138 i, n = 0, len(pat)
1138 i, n = 0, len(pat)
1139 res = ''
1139 res = ''
1140 group = 0
1140 group = 0
1141 escape = util.stringutil.regexbytesescapemap.get
1141 escape = util.stringutil.regexbytesescapemap.get
1142 def peek():
1142 def peek():
1143 return i < n and pat[i:i + 1]
1143 return i < n and pat[i:i + 1]
1144 while i < n:
1144 while i < n:
1145 c = pat[i:i + 1]
1145 c = pat[i:i + 1]
1146 i += 1
1146 i += 1
1147 if c not in '*?[{},\\':
1147 if c not in '*?[{},\\':
1148 res += escape(c, c)
1148 res += escape(c, c)
1149 elif c == '*':
1149 elif c == '*':
1150 if peek() == '*':
1150 if peek() == '*':
1151 i += 1
1151 i += 1
1152 if peek() == '/':
1152 if peek() == '/':
1153 i += 1
1153 i += 1
1154 res += '(?:.*/)?'
1154 res += '(?:.*/)?'
1155 else:
1155 else:
1156 res += '.*'
1156 res += '.*'
1157 else:
1157 else:
1158 res += '[^/]*'
1158 res += '[^/]*'
1159 elif c == '?':
1159 elif c == '?':
1160 res += '.'
1160 res += '.'
1161 elif c == '[':
1161 elif c == '[':
1162 j = i
1162 j = i
1163 if j < n and pat[j:j + 1] in '!]':
1163 if j < n and pat[j:j + 1] in '!]':
1164 j += 1
1164 j += 1
1165 while j < n and pat[j:j + 1] != ']':
1165 while j < n and pat[j:j + 1] != ']':
1166 j += 1
1166 j += 1
1167 if j >= n:
1167 if j >= n:
1168 res += '\\['
1168 res += '\\['
1169 else:
1169 else:
1170 stuff = pat[i:j].replace('\\','\\\\')
1170 stuff = pat[i:j].replace('\\','\\\\')
1171 i = j + 1
1171 i = j + 1
1172 if stuff[0:1] == '!':
1172 if stuff[0:1] == '!':
1173 stuff = '^' + stuff[1:]
1173 stuff = '^' + stuff[1:]
1174 elif stuff[0:1] == '^':
1174 elif stuff[0:1] == '^':
1175 stuff = '\\' + stuff
1175 stuff = '\\' + stuff
1176 res = '%s[%s]' % (res, stuff)
1176 res = '%s[%s]' % (res, stuff)
1177 elif c == '{':
1177 elif c == '{':
1178 group += 1
1178 group += 1
1179 res += '(?:'
1179 res += '(?:'
1180 elif c == '}' and group:
1180 elif c == '}' and group:
1181 res += ')'
1181 res += ')'
1182 group -= 1
1182 group -= 1
1183 elif c == ',' and group:
1183 elif c == ',' and group:
1184 res += '|'
1184 res += '|'
1185 elif c == '\\':
1185 elif c == '\\':
1186 p = peek()
1186 p = peek()
1187 if p:
1187 if p:
1188 i += 1
1188 i += 1
1189 res += escape(p, p)
1189 res += escape(p, p)
1190 else:
1190 else:
1191 res += escape(c, c)
1191 res += escape(c, c)
1192 else:
1192 else:
1193 res += escape(c, c)
1193 res += escape(c, c)
1194 return res
1194 return res
1195
1195
1196 def _regex(kind, pat, globsuffix):
1196 def _regex(kind, pat, globsuffix):
1197 '''Convert a (normalized) pattern of any kind into a
1197 '''Convert a (normalized) pattern of any kind into a
1198 regular expression.
1198 regular expression.
1199 globsuffix is appended to the regexp of globs.'''
1199 globsuffix is appended to the regexp of globs.'''
1200
1200
1201 if rustext is not None:
1201 if rustext is not None:
1202 try:
1202 try:
1203 return rustext.filepatterns.build_single_regex(
1203 return rustext.filepatterns.build_single_regex(
1204 kind,
1204 kind,
1205 pat,
1205 pat,
1206 globsuffix
1206 globsuffix
1207 )
1207 )
1208 except rustext.filepatterns.PatternError:
1208 except rustext.filepatterns.PatternError:
1209 raise error.ProgrammingError(
1209 raise error.ProgrammingError(
1210 'not a regex pattern: %s:%s' % (kind, pat)
1210 'not a regex pattern: %s:%s' % (kind, pat)
1211 )
1211 )
1212
1212
1213 if not pat and kind in ('glob', 'relpath'):
1213 if not pat and kind in ('glob', 'relpath'):
1214 return ''
1214 return ''
1215 if kind == 're':
1215 if kind == 're':
1216 return pat
1216 return pat
1217 if kind in ('path', 'relpath'):
1217 if kind in ('path', 'relpath'):
1218 if pat == '.':
1218 if pat == '.':
1219 return ''
1219 return ''
1220 return util.stringutil.reescape(pat) + '(?:/|$)'
1220 return util.stringutil.reescape(pat) + '(?:/|$)'
1221 if kind == 'rootfilesin':
1221 if kind == 'rootfilesin':
1222 if pat == '.':
1222 if pat == '.':
1223 escaped = ''
1223 escaped = ''
1224 else:
1224 else:
1225 # Pattern is a directory name.
1225 # Pattern is a directory name.
1226 escaped = util.stringutil.reescape(pat) + '/'
1226 escaped = util.stringutil.reescape(pat) + '/'
1227 # Anything after the pattern must be a non-directory.
1227 # Anything after the pattern must be a non-directory.
1228 return escaped + '[^/]+$'
1228 return escaped + '[^/]+$'
1229 if kind == 'relglob':
1229 if kind == 'relglob':
1230 return '(?:|.*/)' + _globre(pat) + globsuffix
1230 return '(?:|.*/)' + _globre(pat) + globsuffix
1231 if kind == 'relre':
1231 if kind == 'relre':
1232 if pat.startswith('^'):
1232 if pat.startswith('^'):
1233 return pat
1233 return pat
1234 return '.*' + pat
1234 return '.*' + pat
1235 if kind in ('glob', 'rootglob'):
1235 if kind in ('glob', 'rootglob'):
1236 return _globre(pat) + globsuffix
1236 return _globre(pat) + globsuffix
1237 raise error.ProgrammingError('not a regex pattern: %s:%s' % (kind, pat))
1237 raise error.ProgrammingError('not a regex pattern: %s:%s' % (kind, pat))
1238
1238
1239 def _buildmatch(kindpats, globsuffix, root):
1239 def _buildmatch(kindpats, globsuffix, root):
1240 '''Return regexp string and a matcher function for kindpats.
1240 '''Return regexp string and a matcher function for kindpats.
1241 globsuffix is appended to the regexp of globs.'''
1241 globsuffix is appended to the regexp of globs.'''
1242 matchfuncs = []
1242 matchfuncs = []
1243
1243
1244 subincludes, kindpats = _expandsubinclude(kindpats, root)
1244 subincludes, kindpats = _expandsubinclude(kindpats, root)
1245 if subincludes:
1245 if subincludes:
1246 submatchers = {}
1246 submatchers = {}
1247 def matchsubinclude(f):
1247 def matchsubinclude(f):
1248 for prefix, matcherargs in subincludes:
1248 for prefix, matcherargs in subincludes:
1249 if f.startswith(prefix):
1249 if f.startswith(prefix):
1250 mf = submatchers.get(prefix)
1250 mf = submatchers.get(prefix)
1251 if mf is None:
1251 if mf is None:
1252 mf = match(*matcherargs)
1252 mf = match(*matcherargs)
1253 submatchers[prefix] = mf
1253 submatchers[prefix] = mf
1254
1254
1255 if mf(f[len(prefix):]):
1255 if mf(f[len(prefix):]):
1256 return True
1256 return True
1257 return False
1257 return False
1258 matchfuncs.append(matchsubinclude)
1258 matchfuncs.append(matchsubinclude)
1259
1259
1260 regex = ''
1260 regex = ''
1261 if kindpats:
1261 if kindpats:
1262 if all(k == 'rootfilesin' for k, p, s in kindpats):
1262 if all(k == 'rootfilesin' for k, p, s in kindpats):
1263 dirs = {p for k, p, s in kindpats}
1263 dirs = {p for k, p, s in kindpats}
1264 def mf(f):
1264 def mf(f):
1265 i = f.rfind('/')
1265 i = f.rfind('/')
1266 if i >= 0:
1266 if i >= 0:
1267 dir = f[:i]
1267 dir = f[:i]
1268 else:
1268 else:
1269 dir = '.'
1269 dir = '.'
1270 return dir in dirs
1270 return dir in dirs
1271 regex = b'rootfilesin: %s' % stringutil.pprint(list(sorted(dirs)))
1271 regex = b'rootfilesin: %s' % stringutil.pprint(list(sorted(dirs)))
1272 matchfuncs.append(mf)
1272 matchfuncs.append(mf)
1273 else:
1273 else:
1274 regex, mf = _buildregexmatch(kindpats, globsuffix)
1274 regex, mf = _buildregexmatch(kindpats, globsuffix)
1275 matchfuncs.append(mf)
1275 matchfuncs.append(mf)
1276
1276
1277 if len(matchfuncs) == 1:
1277 if len(matchfuncs) == 1:
1278 return regex, matchfuncs[0]
1278 return regex, matchfuncs[0]
1279 else:
1279 else:
1280 return regex, lambda f: any(mf(f) for mf in matchfuncs)
1280 return regex, lambda f: any(mf(f) for mf in matchfuncs)
1281
1281
1282 MAX_RE_SIZE = 20000
1282 MAX_RE_SIZE = 20000
1283
1283
1284 def _joinregexes(regexps):
1284 def _joinregexes(regexps):
1285 """gather multiple regular expressions into a single one"""
1285 """gather multiple regular expressions into a single one"""
1286 return '|'.join(regexps)
1286 return '|'.join(regexps)
1287
1287
1288 def _buildregexmatch(kindpats, globsuffix):
1288 def _buildregexmatch(kindpats, globsuffix):
1289 """Build a match function from a list of kinds and kindpats,
1289 """Build a match function from a list of kinds and kindpats,
1290 return regexp string and a matcher function.
1290 return regexp string and a matcher function.
1291
1291
1292 Test too large input
1292 Test too large input
1293 >>> _buildregexmatch([
1293 >>> _buildregexmatch([
1294 ... (b'relglob', b'?' * MAX_RE_SIZE, b'')
1294 ... (b'relglob', b'?' * MAX_RE_SIZE, b'')
1295 ... ], b'$')
1295 ... ], b'$')
1296 Traceback (most recent call last):
1296 Traceback (most recent call last):
1297 ...
1297 ...
1298 Abort: matcher pattern is too long (20009 bytes)
1298 Abort: matcher pattern is too long (20009 bytes)
1299 """
1299 """
1300 try:
1300 try:
1301 allgroups = []
1301 allgroups = []
1302 regexps = [_regex(k, p, globsuffix) for (k, p, s) in kindpats]
1302 regexps = [_regex(k, p, globsuffix) for (k, p, s) in kindpats]
1303 fullregexp = _joinregexes(regexps)
1303 fullregexp = _joinregexes(regexps)
1304
1304
1305 startidx = 0
1305 startidx = 0
1306 groupsize = 0
1306 groupsize = 0
1307 for idx, r in enumerate(regexps):
1307 for idx, r in enumerate(regexps):
1308 piecesize = len(r)
1308 piecesize = len(r)
1309 if piecesize > MAX_RE_SIZE:
1309 if piecesize > MAX_RE_SIZE:
1310 msg = _("matcher pattern is too long (%d bytes)") % piecesize
1310 msg = _("matcher pattern is too long (%d bytes)") % piecesize
1311 raise error.Abort(msg)
1311 raise error.Abort(msg)
1312 elif (groupsize + piecesize) > MAX_RE_SIZE:
1312 elif (groupsize + piecesize) > MAX_RE_SIZE:
1313 group = regexps[startidx:idx]
1313 group = regexps[startidx:idx]
1314 allgroups.append(_joinregexes(group))
1314 allgroups.append(_joinregexes(group))
1315 startidx = idx
1315 startidx = idx
1316 groupsize = 0
1316 groupsize = 0
1317 groupsize += piecesize + 1
1317 groupsize += piecesize + 1
1318
1318
1319 if startidx == 0:
1319 if startidx == 0:
1320 matcher = _rematcher(fullregexp)
1320 matcher = _rematcher(fullregexp)
1321 func = lambda s: bool(matcher(s))
1321 func = lambda s: bool(matcher(s))
1322 else:
1322 else:
1323 group = regexps[startidx:]
1323 group = regexps[startidx:]
1324 allgroups.append(_joinregexes(group))
1324 allgroups.append(_joinregexes(group))
1325 allmatchers = [_rematcher(g) for g in allgroups]
1325 allmatchers = [_rematcher(g) for g in allgroups]
1326 func = lambda s: any(m(s) for m in allmatchers)
1326 func = lambda s: any(m(s) for m in allmatchers)
1327 return fullregexp, func
1327 return fullregexp, func
1328 except re.error:
1328 except re.error:
1329 for k, p, s in kindpats:
1329 for k, p, s in kindpats:
1330 try:
1330 try:
1331 _rematcher(_regex(k, p, globsuffix))
1331 _rematcher(_regex(k, p, globsuffix))
1332 except re.error:
1332 except re.error:
1333 if s:
1333 if s:
1334 raise error.Abort(_("%s: invalid pattern (%s): %s") %
1334 raise error.Abort(_("%s: invalid pattern (%s): %s") %
1335 (s, k, p))
1335 (s, k, p))
1336 else:
1336 else:
1337 raise error.Abort(_("invalid pattern (%s): %s") % (k, p))
1337 raise error.Abort(_("invalid pattern (%s): %s") % (k, p))
1338 raise error.Abort(_("invalid pattern"))
1338 raise error.Abort(_("invalid pattern"))
1339
1339
1340 def _patternrootsanddirs(kindpats):
1340 def _patternrootsanddirs(kindpats):
1341 '''Returns roots and directories corresponding to each pattern.
1341 '''Returns roots and directories corresponding to each pattern.
1342
1342
1343 This calculates the roots and directories exactly matching the patterns and
1343 This calculates the roots and directories exactly matching the patterns and
1344 returns a tuple of (roots, dirs) for each. It does not return other
1344 returns a tuple of (roots, dirs) for each. It does not return other
1345 directories which may also need to be considered, like the parent
1345 directories which may also need to be considered, like the parent
1346 directories.
1346 directories.
1347 '''
1347 '''
1348 r = []
1348 r = []
1349 d = []
1349 d = []
1350 for kind, pat, source in kindpats:
1350 for kind, pat, source in kindpats:
1351 if kind in ('glob', 'rootglob'): # find the non-glob prefix
1351 if kind in ('glob', 'rootglob'): # find the non-glob prefix
1352 root = []
1352 root = []
1353 for p in pat.split('/'):
1353 for p in pat.split('/'):
1354 if '[' in p or '{' in p or '*' in p or '?' in p:
1354 if '[' in p or '{' in p or '*' in p or '?' in p:
1355 break
1355 break
1356 root.append(p)
1356 root.append(p)
1357 r.append('/'.join(root))
1357 r.append('/'.join(root))
1358 elif kind in ('relpath', 'path'):
1358 elif kind in ('relpath', 'path'):
1359 if pat == '.':
1359 if pat == '.':
1360 pat = ''
1360 pat = ''
1361 r.append(pat)
1361 r.append(pat)
1362 elif kind in ('rootfilesin',):
1362 elif kind in ('rootfilesin',):
1363 if pat == '.':
1363 if pat == '.':
1364 pat = ''
1364 pat = ''
1365 d.append(pat)
1365 d.append(pat)
1366 else: # relglob, re, relre
1366 else: # relglob, re, relre
1367 r.append('')
1367 r.append('')
1368 return r, d
1368 return r, d
1369
1369
1370 def _roots(kindpats):
1370 def _roots(kindpats):
1371 '''Returns root directories to match recursively from the given patterns.'''
1371 '''Returns root directories to match recursively from the given patterns.'''
1372 roots, dirs = _patternrootsanddirs(kindpats)
1372 roots, dirs = _patternrootsanddirs(kindpats)
1373 return roots
1373 return roots
1374
1374
1375 def _rootsdirsandparents(kindpats):
1375 def _rootsdirsandparents(kindpats):
1376 '''Returns roots and exact directories from patterns.
1376 '''Returns roots and exact directories from patterns.
1377
1377
1378 `roots` are directories to match recursively, `dirs` should
1378 `roots` are directories to match recursively, `dirs` should
1379 be matched non-recursively, and `parents` are the implicitly required
1379 be matched non-recursively, and `parents` are the implicitly required
1380 directories to walk to items in either roots or dirs.
1380 directories to walk to items in either roots or dirs.
1381
1381
1382 Returns a tuple of (roots, dirs, parents).
1382 Returns a tuple of (roots, dirs, parents).
1383
1383
1384 >>> _rootsdirsandparents(
1384 >>> _rootsdirsandparents(
1385 ... [(b'glob', b'g/h/*', b''), (b'glob', b'g/h', b''),
1385 ... [(b'glob', b'g/h/*', b''), (b'glob', b'g/h', b''),
1386 ... (b'glob', b'g*', b'')])
1386 ... (b'glob', b'g*', b'')])
1387 (['g/h', 'g/h', ''], [], ['', 'g'])
1387 (['g/h', 'g/h', ''], [], set(['', 'g']))
1388 >>> _rootsdirsandparents(
1388 >>> _rootsdirsandparents(
1389 ... [(b'rootfilesin', b'g/h', b''), (b'rootfilesin', b'', b'')])
1389 ... [(b'rootfilesin', b'g/h', b''), (b'rootfilesin', b'', b'')])
1390 ([], ['g/h', ''], ['', 'g'])
1390 ([], ['g/h', ''], set(['', 'g']))
1391 >>> _rootsdirsandparents(
1391 >>> _rootsdirsandparents(
1392 ... [(b'relpath', b'r', b''), (b'path', b'p/p', b''),
1392 ... [(b'relpath', b'r', b''), (b'path', b'p/p', b''),
1393 ... (b'path', b'', b'')])
1393 ... (b'path', b'', b'')])
1394 (['r', 'p/p', ''], [], ['', 'p'])
1394 (['r', 'p/p', ''], [], set(['', 'p']))
1395 >>> _rootsdirsandparents(
1395 >>> _rootsdirsandparents(
1396 ... [(b'relglob', b'rg*', b''), (b're', b're/', b''),
1396 ... [(b'relglob', b'rg*', b''), (b're', b're/', b''),
1397 ... (b'relre', b'rr', b'')])
1397 ... (b'relre', b'rr', b'')])
1398 (['', '', ''], [], [''])
1398 (['', '', ''], [], set(['']))
1399 '''
1399 '''
1400 r, d = _patternrootsanddirs(kindpats)
1400 r, d = _patternrootsanddirs(kindpats)
1401
1401
1402 p = []
1402 p = set()
1403 # Append the parents as non-recursive/exact directories, since they must be
1403 # Add the parents as non-recursive/exact directories, since they must be
1404 # scanned to get to either the roots or the other exact directories.
1404 # scanned to get to either the roots or the other exact directories.
1405 p.extend(util.dirs(d))
1405 p.update(util.dirs(d))
1406 p.extend(util.dirs(r))
1406 p.update(util.dirs(r))
1407
1407
1408 # FIXME: all uses of this function convert these to sets, do so before
1408 # FIXME: all uses of this function convert these to sets, do so before
1409 # returning.
1409 # returning.
1410 # FIXME: all uses of this function do not need anything in 'roots' and
1410 # FIXME: all uses of this function do not need anything in 'roots' and
1411 # 'dirs' to also be in 'parents', consider removing them before returning.
1411 # 'dirs' to also be in 'parents', consider removing them before returning.
1412 return r, d, p
1412 return r, d, p
1413
1413
1414 def _explicitfiles(kindpats):
1414 def _explicitfiles(kindpats):
1415 '''Returns the potential explicit filenames from the patterns.
1415 '''Returns the potential explicit filenames from the patterns.
1416
1416
1417 >>> _explicitfiles([(b'path', b'foo/bar', b'')])
1417 >>> _explicitfiles([(b'path', b'foo/bar', b'')])
1418 ['foo/bar']
1418 ['foo/bar']
1419 >>> _explicitfiles([(b'rootfilesin', b'foo/bar', b'')])
1419 >>> _explicitfiles([(b'rootfilesin', b'foo/bar', b'')])
1420 []
1420 []
1421 '''
1421 '''
1422 # Keep only the pattern kinds where one can specify filenames (vs only
1422 # Keep only the pattern kinds where one can specify filenames (vs only
1423 # directory names).
1423 # directory names).
1424 filable = [kp for kp in kindpats if kp[0] not in ('rootfilesin',)]
1424 filable = [kp for kp in kindpats if kp[0] not in ('rootfilesin',)]
1425 return _roots(filable)
1425 return _roots(filable)
1426
1426
1427 def _prefix(kindpats):
1427 def _prefix(kindpats):
1428 '''Whether all the patterns match a prefix (i.e. recursively)'''
1428 '''Whether all the patterns match a prefix (i.e. recursively)'''
1429 for kind, pat, source in kindpats:
1429 for kind, pat, source in kindpats:
1430 if kind not in ('path', 'relpath'):
1430 if kind not in ('path', 'relpath'):
1431 return False
1431 return False
1432 return True
1432 return True
1433
1433
1434 _commentre = None
1434 _commentre = None
1435
1435
1436 def readpatternfile(filepath, warn, sourceinfo=False):
1436 def readpatternfile(filepath, warn, sourceinfo=False):
1437 '''parse a pattern file, returning a list of
1437 '''parse a pattern file, returning a list of
1438 patterns. These patterns should be given to compile()
1438 patterns. These patterns should be given to compile()
1439 to be validated and converted into a match function.
1439 to be validated and converted into a match function.
1440
1440
1441 trailing white space is dropped.
1441 trailing white space is dropped.
1442 the escape character is backslash.
1442 the escape character is backslash.
1443 comments start with #.
1443 comments start with #.
1444 empty lines are skipped.
1444 empty lines are skipped.
1445
1445
1446 lines can be of the following formats:
1446 lines can be of the following formats:
1447
1447
1448 syntax: regexp # defaults following lines to non-rooted regexps
1448 syntax: regexp # defaults following lines to non-rooted regexps
1449 syntax: glob # defaults following lines to non-rooted globs
1449 syntax: glob # defaults following lines to non-rooted globs
1450 re:pattern # non-rooted regular expression
1450 re:pattern # non-rooted regular expression
1451 glob:pattern # non-rooted glob
1451 glob:pattern # non-rooted glob
1452 rootglob:pat # rooted glob (same root as ^ in regexps)
1452 rootglob:pat # rooted glob (same root as ^ in regexps)
1453 pattern # pattern of the current default type
1453 pattern # pattern of the current default type
1454
1454
1455 if sourceinfo is set, returns a list of tuples:
1455 if sourceinfo is set, returns a list of tuples:
1456 (pattern, lineno, originalline).
1456 (pattern, lineno, originalline).
1457 This is useful to debug ignore patterns.
1457 This is useful to debug ignore patterns.
1458 '''
1458 '''
1459
1459
1460 if rustext is not None:
1460 if rustext is not None:
1461 result, warnings = rustext.filepatterns.read_pattern_file(
1461 result, warnings = rustext.filepatterns.read_pattern_file(
1462 filepath,
1462 filepath,
1463 bool(warn),
1463 bool(warn),
1464 sourceinfo,
1464 sourceinfo,
1465 )
1465 )
1466
1466
1467 for warning_params in warnings:
1467 for warning_params in warnings:
1468 # Can't be easily emitted from Rust, because it would require
1468 # Can't be easily emitted from Rust, because it would require
1469 # a mechanism for both gettext and calling the `warn` function.
1469 # a mechanism for both gettext and calling the `warn` function.
1470 warn(_("%s: ignoring invalid syntax '%s'\n") % warning_params)
1470 warn(_("%s: ignoring invalid syntax '%s'\n") % warning_params)
1471
1471
1472 return result
1472 return result
1473
1473
1474 syntaxes = {
1474 syntaxes = {
1475 're': 'relre:',
1475 're': 'relre:',
1476 'regexp': 'relre:',
1476 'regexp': 'relre:',
1477 'glob': 'relglob:',
1477 'glob': 'relglob:',
1478 'rootglob': 'rootglob:',
1478 'rootglob': 'rootglob:',
1479 'include': 'include',
1479 'include': 'include',
1480 'subinclude': 'subinclude',
1480 'subinclude': 'subinclude',
1481 }
1481 }
1482 syntax = 'relre:'
1482 syntax = 'relre:'
1483 patterns = []
1483 patterns = []
1484
1484
1485 fp = open(filepath, 'rb')
1485 fp = open(filepath, 'rb')
1486 for lineno, line in enumerate(util.iterfile(fp), start=1):
1486 for lineno, line in enumerate(util.iterfile(fp), start=1):
1487 if "#" in line:
1487 if "#" in line:
1488 global _commentre
1488 global _commentre
1489 if not _commentre:
1489 if not _commentre:
1490 _commentre = util.re.compile(br'((?:^|[^\\])(?:\\\\)*)#.*')
1490 _commentre = util.re.compile(br'((?:^|[^\\])(?:\\\\)*)#.*')
1491 # remove comments prefixed by an even number of escapes
1491 # remove comments prefixed by an even number of escapes
1492 m = _commentre.search(line)
1492 m = _commentre.search(line)
1493 if m:
1493 if m:
1494 line = line[:m.end(1)]
1494 line = line[:m.end(1)]
1495 # fixup properly escaped comments that survived the above
1495 # fixup properly escaped comments that survived the above
1496 line = line.replace("\\#", "#")
1496 line = line.replace("\\#", "#")
1497 line = line.rstrip()
1497 line = line.rstrip()
1498 if not line:
1498 if not line:
1499 continue
1499 continue
1500
1500
1501 if line.startswith('syntax:'):
1501 if line.startswith('syntax:'):
1502 s = line[7:].strip()
1502 s = line[7:].strip()
1503 try:
1503 try:
1504 syntax = syntaxes[s]
1504 syntax = syntaxes[s]
1505 except KeyError:
1505 except KeyError:
1506 if warn:
1506 if warn:
1507 warn(_("%s: ignoring invalid syntax '%s'\n") %
1507 warn(_("%s: ignoring invalid syntax '%s'\n") %
1508 (filepath, s))
1508 (filepath, s))
1509 continue
1509 continue
1510
1510
1511 linesyntax = syntax
1511 linesyntax = syntax
1512 for s, rels in syntaxes.iteritems():
1512 for s, rels in syntaxes.iteritems():
1513 if line.startswith(rels):
1513 if line.startswith(rels):
1514 linesyntax = rels
1514 linesyntax = rels
1515 line = line[len(rels):]
1515 line = line[len(rels):]
1516 break
1516 break
1517 elif line.startswith(s+':'):
1517 elif line.startswith(s+':'):
1518 linesyntax = rels
1518 linesyntax = rels
1519 line = line[len(s) + 1:]
1519 line = line[len(s) + 1:]
1520 break
1520 break
1521 if sourceinfo:
1521 if sourceinfo:
1522 patterns.append((linesyntax + line, lineno, line))
1522 patterns.append((linesyntax + line, lineno, line))
1523 else:
1523 else:
1524 patterns.append(linesyntax + line)
1524 patterns.append(linesyntax + line)
1525 fp.close()
1525 fp.close()
1526 return patterns
1526 return patterns
General Comments 0
You need to be logged in to leave comments. Login now