##// END OF EJS Templates
match: stabilize _rootsdirsandparents doctest...
marmoute -
r42559:96fc696a default
parent child Browse files
Show More
@@ -1,1525 +1,1529
1 # match.py - filename matching
1 # match.py - filename matching
2 #
2 #
3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
3 # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import, print_function
8 from __future__ import absolute_import, print_function
9
9
10 import copy
10 import copy
11 import itertools
11 import itertools
12 import os
12 import os
13 import re
13 import re
14
14
15 from .i18n import _
15 from .i18n import _
16 from . import (
16 from . import (
17 encoding,
17 encoding,
18 error,
18 error,
19 pathutil,
19 pathutil,
20 pycompat,
20 pycompat,
21 util,
21 util,
22 )
22 )
23 from .utils import (
23 from .utils import (
24 stringutil,
24 stringutil,
25 )
25 )
26
26
27 try:
27 try:
28 from . import rustext
28 from . import rustext
29 rustext.__name__ # force actual import (see hgdemandimport)
29 rustext.__name__ # force actual import (see hgdemandimport)
30 except ImportError:
30 except ImportError:
31 rustext = None
31 rustext = None
32
32
33 allpatternkinds = ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',
33 allpatternkinds = ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',
34 'rootglob',
34 'rootglob',
35 'listfile', 'listfile0', 'set', 'include', 'subinclude',
35 'listfile', 'listfile0', 'set', 'include', 'subinclude',
36 'rootfilesin')
36 'rootfilesin')
37 cwdrelativepatternkinds = ('relpath', 'glob')
37 cwdrelativepatternkinds = ('relpath', 'glob')
38
38
39 propertycache = util.propertycache
39 propertycache = util.propertycache
40
40
41 def _rematcher(regex):
41 def _rematcher(regex):
42 '''compile the regexp with the best available regexp engine and return a
42 '''compile the regexp with the best available regexp engine and return a
43 matcher function'''
43 matcher function'''
44 m = util.re.compile(regex)
44 m = util.re.compile(regex)
45 try:
45 try:
46 # slightly faster, provided by facebook's re2 bindings
46 # slightly faster, provided by facebook's re2 bindings
47 return m.test_match
47 return m.test_match
48 except AttributeError:
48 except AttributeError:
49 return m.match
49 return m.match
50
50
51 def _expandsets(kindpats, ctx=None, listsubrepos=False, badfn=None):
51 def _expandsets(kindpats, ctx=None, listsubrepos=False, badfn=None):
52 '''Returns the kindpats list with the 'set' patterns expanded to matchers'''
52 '''Returns the kindpats list with the 'set' patterns expanded to matchers'''
53 matchers = []
53 matchers = []
54 other = []
54 other = []
55
55
56 for kind, pat, source in kindpats:
56 for kind, pat, source in kindpats:
57 if kind == 'set':
57 if kind == 'set':
58 if ctx is None:
58 if ctx is None:
59 raise error.ProgrammingError("fileset expression with no "
59 raise error.ProgrammingError("fileset expression with no "
60 "context")
60 "context")
61 matchers.append(ctx.matchfileset(pat, badfn=badfn))
61 matchers.append(ctx.matchfileset(pat, badfn=badfn))
62
62
63 if listsubrepos:
63 if listsubrepos:
64 for subpath in ctx.substate:
64 for subpath in ctx.substate:
65 sm = ctx.sub(subpath).matchfileset(pat, badfn=badfn)
65 sm = ctx.sub(subpath).matchfileset(pat, badfn=badfn)
66 pm = prefixdirmatcher(subpath, sm, badfn=badfn)
66 pm = prefixdirmatcher(subpath, sm, badfn=badfn)
67 matchers.append(pm)
67 matchers.append(pm)
68
68
69 continue
69 continue
70 other.append((kind, pat, source))
70 other.append((kind, pat, source))
71 return matchers, other
71 return matchers, other
72
72
73 def _expandsubinclude(kindpats, root):
73 def _expandsubinclude(kindpats, root):
74 '''Returns the list of subinclude matcher args and the kindpats without the
74 '''Returns the list of subinclude matcher args and the kindpats without the
75 subincludes in it.'''
75 subincludes in it.'''
76 relmatchers = []
76 relmatchers = []
77 other = []
77 other = []
78
78
79 for kind, pat, source in kindpats:
79 for kind, pat, source in kindpats:
80 if kind == 'subinclude':
80 if kind == 'subinclude':
81 sourceroot = pathutil.dirname(util.normpath(source))
81 sourceroot = pathutil.dirname(util.normpath(source))
82 pat = util.pconvert(pat)
82 pat = util.pconvert(pat)
83 path = pathutil.join(sourceroot, pat)
83 path = pathutil.join(sourceroot, pat)
84
84
85 newroot = pathutil.dirname(path)
85 newroot = pathutil.dirname(path)
86 matcherargs = (newroot, '', [], ['include:%s' % path])
86 matcherargs = (newroot, '', [], ['include:%s' % path])
87
87
88 prefix = pathutil.canonpath(root, root, newroot)
88 prefix = pathutil.canonpath(root, root, newroot)
89 if prefix:
89 if prefix:
90 prefix += '/'
90 prefix += '/'
91 relmatchers.append((prefix, matcherargs))
91 relmatchers.append((prefix, matcherargs))
92 else:
92 else:
93 other.append((kind, pat, source))
93 other.append((kind, pat, source))
94
94
95 return relmatchers, other
95 return relmatchers, other
96
96
97 def _kindpatsalwaysmatch(kindpats):
97 def _kindpatsalwaysmatch(kindpats):
98 """"Checks whether the kindspats match everything, as e.g.
98 """"Checks whether the kindspats match everything, as e.g.
99 'relpath:.' does.
99 'relpath:.' does.
100 """
100 """
101 for kind, pat, source in kindpats:
101 for kind, pat, source in kindpats:
102 if pat != '' or kind not in ['relpath', 'glob']:
102 if pat != '' or kind not in ['relpath', 'glob']:
103 return False
103 return False
104 return True
104 return True
105
105
106 def _buildkindpatsmatcher(matchercls, root, kindpats, ctx=None,
106 def _buildkindpatsmatcher(matchercls, root, kindpats, ctx=None,
107 listsubrepos=False, badfn=None):
107 listsubrepos=False, badfn=None):
108 matchers = []
108 matchers = []
109 fms, kindpats = _expandsets(kindpats, ctx=ctx,
109 fms, kindpats = _expandsets(kindpats, ctx=ctx,
110 listsubrepos=listsubrepos, badfn=badfn)
110 listsubrepos=listsubrepos, badfn=badfn)
111 if kindpats:
111 if kindpats:
112 m = matchercls(root, kindpats, badfn=badfn)
112 m = matchercls(root, kindpats, badfn=badfn)
113 matchers.append(m)
113 matchers.append(m)
114 if fms:
114 if fms:
115 matchers.extend(fms)
115 matchers.extend(fms)
116 if not matchers:
116 if not matchers:
117 return nevermatcher(badfn=badfn)
117 return nevermatcher(badfn=badfn)
118 if len(matchers) == 1:
118 if len(matchers) == 1:
119 return matchers[0]
119 return matchers[0]
120 return unionmatcher(matchers)
120 return unionmatcher(matchers)
121
121
122 def match(root, cwd, patterns=None, include=None, exclude=None, default='glob',
122 def match(root, cwd, patterns=None, include=None, exclude=None, default='glob',
123 auditor=None, ctx=None, listsubrepos=False, warn=None,
123 auditor=None, ctx=None, listsubrepos=False, warn=None,
124 badfn=None, icasefs=False):
124 badfn=None, icasefs=False):
125 r"""build an object to match a set of file patterns
125 r"""build an object to match a set of file patterns
126
126
127 arguments:
127 arguments:
128 root - the canonical root of the tree you're matching against
128 root - the canonical root of the tree you're matching against
129 cwd - the current working directory, if relevant
129 cwd - the current working directory, if relevant
130 patterns - patterns to find
130 patterns - patterns to find
131 include - patterns to include (unless they are excluded)
131 include - patterns to include (unless they are excluded)
132 exclude - patterns to exclude (even if they are included)
132 exclude - patterns to exclude (even if they are included)
133 default - if a pattern in patterns has no explicit type, assume this one
133 default - if a pattern in patterns has no explicit type, assume this one
134 auditor - optional path auditor
134 auditor - optional path auditor
135 ctx - optional changecontext
135 ctx - optional changecontext
136 listsubrepos - if True, recurse into subrepositories
136 listsubrepos - if True, recurse into subrepositories
137 warn - optional function used for printing warnings
137 warn - optional function used for printing warnings
138 badfn - optional bad() callback for this matcher instead of the default
138 badfn - optional bad() callback for this matcher instead of the default
139 icasefs - make a matcher for wdir on case insensitive filesystems, which
139 icasefs - make a matcher for wdir on case insensitive filesystems, which
140 normalizes the given patterns to the case in the filesystem
140 normalizes the given patterns to the case in the filesystem
141
141
142 a pattern is one of:
142 a pattern is one of:
143 'glob:<glob>' - a glob relative to cwd
143 'glob:<glob>' - a glob relative to cwd
144 're:<regexp>' - a regular expression
144 're:<regexp>' - a regular expression
145 'path:<path>' - a path relative to repository root, which is matched
145 'path:<path>' - a path relative to repository root, which is matched
146 recursively
146 recursively
147 'rootfilesin:<path>' - a path relative to repository root, which is
147 'rootfilesin:<path>' - a path relative to repository root, which is
148 matched non-recursively (will not match subdirectories)
148 matched non-recursively (will not match subdirectories)
149 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
149 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
150 'relpath:<path>' - a path relative to cwd
150 'relpath:<path>' - a path relative to cwd
151 'relre:<regexp>' - a regexp that needn't match the start of a name
151 'relre:<regexp>' - a regexp that needn't match the start of a name
152 'set:<fileset>' - a fileset expression
152 'set:<fileset>' - a fileset expression
153 'include:<path>' - a file of patterns to read and include
153 'include:<path>' - a file of patterns to read and include
154 'subinclude:<path>' - a file of patterns to match against files under
154 'subinclude:<path>' - a file of patterns to match against files under
155 the same directory
155 the same directory
156 '<something>' - a pattern of the specified default type
156 '<something>' - a pattern of the specified default type
157
157
158 Usually a patternmatcher is returned:
158 Usually a patternmatcher is returned:
159 >>> match(b'foo', b'.', [b're:.*\.c$', b'path:foo/a', b'*.py'])
159 >>> match(b'foo', b'.', [b're:.*\.c$', b'path:foo/a', b'*.py'])
160 <patternmatcher patterns='.*\\.c$|foo/a(?:/|$)|[^/]*\\.py$'>
160 <patternmatcher patterns='.*\\.c$|foo/a(?:/|$)|[^/]*\\.py$'>
161
161
162 Combining 'patterns' with 'include' (resp. 'exclude') gives an
162 Combining 'patterns' with 'include' (resp. 'exclude') gives an
163 intersectionmatcher (resp. a differencematcher):
163 intersectionmatcher (resp. a differencematcher):
164 >>> type(match(b'foo', b'.', [b're:.*\.c$'], include=[b'path:lib']))
164 >>> type(match(b'foo', b'.', [b're:.*\.c$'], include=[b'path:lib']))
165 <class 'mercurial.match.intersectionmatcher'>
165 <class 'mercurial.match.intersectionmatcher'>
166 >>> type(match(b'foo', b'.', [b're:.*\.c$'], exclude=[b'path:build']))
166 >>> type(match(b'foo', b'.', [b're:.*\.c$'], exclude=[b'path:build']))
167 <class 'mercurial.match.differencematcher'>
167 <class 'mercurial.match.differencematcher'>
168
168
169 Notice that, if 'patterns' is empty, an alwaysmatcher is returned:
169 Notice that, if 'patterns' is empty, an alwaysmatcher is returned:
170 >>> match(b'foo', b'.', [])
170 >>> match(b'foo', b'.', [])
171 <alwaysmatcher>
171 <alwaysmatcher>
172
172
173 The 'default' argument determines which kind of pattern is assumed if a
173 The 'default' argument determines which kind of pattern is assumed if a
174 pattern has no prefix:
174 pattern has no prefix:
175 >>> match(b'foo', b'.', [b'.*\.c$'], default=b're')
175 >>> match(b'foo', b'.', [b'.*\.c$'], default=b're')
176 <patternmatcher patterns='.*\\.c$'>
176 <patternmatcher patterns='.*\\.c$'>
177 >>> match(b'foo', b'.', [b'main.py'], default=b'relpath')
177 >>> match(b'foo', b'.', [b'main.py'], default=b'relpath')
178 <patternmatcher patterns='main\\.py(?:/|$)'>
178 <patternmatcher patterns='main\\.py(?:/|$)'>
179 >>> match(b'foo', b'.', [b'main.py'], default=b're')
179 >>> match(b'foo', b'.', [b'main.py'], default=b're')
180 <patternmatcher patterns='main.py'>
180 <patternmatcher patterns='main.py'>
181
181
182 The primary use of matchers is to check whether a value (usually a file
182 The primary use of matchers is to check whether a value (usually a file
183 name) matches againset one of the patterns given at initialization. There
183 name) matches againset one of the patterns given at initialization. There
184 are two ways of doing this check.
184 are two ways of doing this check.
185
185
186 >>> m = match(b'foo', b'', [b're:.*\.c$', b'relpath:a'])
186 >>> m = match(b'foo', b'', [b're:.*\.c$', b'relpath:a'])
187
187
188 1. Calling the matcher with a file name returns True if any pattern
188 1. Calling the matcher with a file name returns True if any pattern
189 matches that file name:
189 matches that file name:
190 >>> m(b'a')
190 >>> m(b'a')
191 True
191 True
192 >>> m(b'main.c')
192 >>> m(b'main.c')
193 True
193 True
194 >>> m(b'test.py')
194 >>> m(b'test.py')
195 False
195 False
196
196
197 2. Using the exact() method only returns True if the file name matches one
197 2. Using the exact() method only returns True if the file name matches one
198 of the exact patterns (i.e. not re: or glob: patterns):
198 of the exact patterns (i.e. not re: or glob: patterns):
199 >>> m.exact(b'a')
199 >>> m.exact(b'a')
200 True
200 True
201 >>> m.exact(b'main.c')
201 >>> m.exact(b'main.c')
202 False
202 False
203 """
203 """
204 normalize = _donormalize
204 normalize = _donormalize
205 if icasefs:
205 if icasefs:
206 dirstate = ctx.repo().dirstate
206 dirstate = ctx.repo().dirstate
207 dsnormalize = dirstate.normalize
207 dsnormalize = dirstate.normalize
208
208
209 def normalize(patterns, default, root, cwd, auditor, warn):
209 def normalize(patterns, default, root, cwd, auditor, warn):
210 kp = _donormalize(patterns, default, root, cwd, auditor, warn)
210 kp = _donormalize(patterns, default, root, cwd, auditor, warn)
211 kindpats = []
211 kindpats = []
212 for kind, pats, source in kp:
212 for kind, pats, source in kp:
213 if kind not in ('re', 'relre'): # regex can't be normalized
213 if kind not in ('re', 'relre'): # regex can't be normalized
214 p = pats
214 p = pats
215 pats = dsnormalize(pats)
215 pats = dsnormalize(pats)
216
216
217 # Preserve the original to handle a case only rename.
217 # Preserve the original to handle a case only rename.
218 if p != pats and p in dirstate:
218 if p != pats and p in dirstate:
219 kindpats.append((kind, p, source))
219 kindpats.append((kind, p, source))
220
220
221 kindpats.append((kind, pats, source))
221 kindpats.append((kind, pats, source))
222 return kindpats
222 return kindpats
223
223
224 if patterns:
224 if patterns:
225 kindpats = normalize(patterns, default, root, cwd, auditor, warn)
225 kindpats = normalize(patterns, default, root, cwd, auditor, warn)
226 if _kindpatsalwaysmatch(kindpats):
226 if _kindpatsalwaysmatch(kindpats):
227 m = alwaysmatcher(badfn)
227 m = alwaysmatcher(badfn)
228 else:
228 else:
229 m = _buildkindpatsmatcher(patternmatcher, root, kindpats, ctx=ctx,
229 m = _buildkindpatsmatcher(patternmatcher, root, kindpats, ctx=ctx,
230 listsubrepos=listsubrepos, badfn=badfn)
230 listsubrepos=listsubrepos, badfn=badfn)
231 else:
231 else:
232 # It's a little strange that no patterns means to match everything.
232 # It's a little strange that no patterns means to match everything.
233 # Consider changing this to match nothing (probably using nevermatcher).
233 # Consider changing this to match nothing (probably using nevermatcher).
234 m = alwaysmatcher(badfn)
234 m = alwaysmatcher(badfn)
235
235
236 if include:
236 if include:
237 kindpats = normalize(include, 'glob', root, cwd, auditor, warn)
237 kindpats = normalize(include, 'glob', root, cwd, auditor, warn)
238 im = _buildkindpatsmatcher(includematcher, root, kindpats, ctx=ctx,
238 im = _buildkindpatsmatcher(includematcher, root, kindpats, ctx=ctx,
239 listsubrepos=listsubrepos, badfn=None)
239 listsubrepos=listsubrepos, badfn=None)
240 m = intersectmatchers(m, im)
240 m = intersectmatchers(m, im)
241 if exclude:
241 if exclude:
242 kindpats = normalize(exclude, 'glob', root, cwd, auditor, warn)
242 kindpats = normalize(exclude, 'glob', root, cwd, auditor, warn)
243 em = _buildkindpatsmatcher(includematcher, root, kindpats, ctx=ctx,
243 em = _buildkindpatsmatcher(includematcher, root, kindpats, ctx=ctx,
244 listsubrepos=listsubrepos, badfn=None)
244 listsubrepos=listsubrepos, badfn=None)
245 m = differencematcher(m, em)
245 m = differencematcher(m, em)
246 return m
246 return m
247
247
248 def exact(files, badfn=None):
248 def exact(files, badfn=None):
249 return exactmatcher(files, badfn=badfn)
249 return exactmatcher(files, badfn=badfn)
250
250
251 def always(badfn=None):
251 def always(badfn=None):
252 return alwaysmatcher(badfn)
252 return alwaysmatcher(badfn)
253
253
254 def never(badfn=None):
254 def never(badfn=None):
255 return nevermatcher(badfn)
255 return nevermatcher(badfn)
256
256
257 def badmatch(match, badfn):
257 def badmatch(match, badfn):
258 """Make a copy of the given matcher, replacing its bad method with the given
258 """Make a copy of the given matcher, replacing its bad method with the given
259 one.
259 one.
260 """
260 """
261 m = copy.copy(match)
261 m = copy.copy(match)
262 m.bad = badfn
262 m.bad = badfn
263 return m
263 return m
264
264
265 def _donormalize(patterns, default, root, cwd, auditor=None, warn=None):
265 def _donormalize(patterns, default, root, cwd, auditor=None, warn=None):
266 '''Convert 'kind:pat' from the patterns list to tuples with kind and
266 '''Convert 'kind:pat' from the patterns list to tuples with kind and
267 normalized and rooted patterns and with listfiles expanded.'''
267 normalized and rooted patterns and with listfiles expanded.'''
268 kindpats = []
268 kindpats = []
269 for kind, pat in [_patsplit(p, default) for p in patterns]:
269 for kind, pat in [_patsplit(p, default) for p in patterns]:
270 if kind in cwdrelativepatternkinds:
270 if kind in cwdrelativepatternkinds:
271 pat = pathutil.canonpath(root, cwd, pat, auditor=auditor)
271 pat = pathutil.canonpath(root, cwd, pat, auditor=auditor)
272 elif kind in ('relglob', 'path', 'rootfilesin', 'rootglob'):
272 elif kind in ('relglob', 'path', 'rootfilesin', 'rootglob'):
273 pat = util.normpath(pat)
273 pat = util.normpath(pat)
274 elif kind in ('listfile', 'listfile0'):
274 elif kind in ('listfile', 'listfile0'):
275 try:
275 try:
276 files = util.readfile(pat)
276 files = util.readfile(pat)
277 if kind == 'listfile0':
277 if kind == 'listfile0':
278 files = files.split('\0')
278 files = files.split('\0')
279 else:
279 else:
280 files = files.splitlines()
280 files = files.splitlines()
281 files = [f for f in files if f]
281 files = [f for f in files if f]
282 except EnvironmentError:
282 except EnvironmentError:
283 raise error.Abort(_("unable to read file list (%s)") % pat)
283 raise error.Abort(_("unable to read file list (%s)") % pat)
284 for k, p, source in _donormalize(files, default, root, cwd,
284 for k, p, source in _donormalize(files, default, root, cwd,
285 auditor, warn):
285 auditor, warn):
286 kindpats.append((k, p, pat))
286 kindpats.append((k, p, pat))
287 continue
287 continue
288 elif kind == 'include':
288 elif kind == 'include':
289 try:
289 try:
290 fullpath = os.path.join(root, util.localpath(pat))
290 fullpath = os.path.join(root, util.localpath(pat))
291 includepats = readpatternfile(fullpath, warn)
291 includepats = readpatternfile(fullpath, warn)
292 for k, p, source in _donormalize(includepats, default,
292 for k, p, source in _donormalize(includepats, default,
293 root, cwd, auditor, warn):
293 root, cwd, auditor, warn):
294 kindpats.append((k, p, source or pat))
294 kindpats.append((k, p, source or pat))
295 except error.Abort as inst:
295 except error.Abort as inst:
296 raise error.Abort('%s: %s' % (pat, inst[0]))
296 raise error.Abort('%s: %s' % (pat, inst[0]))
297 except IOError as inst:
297 except IOError as inst:
298 if warn:
298 if warn:
299 warn(_("skipping unreadable pattern file '%s': %s\n") %
299 warn(_("skipping unreadable pattern file '%s': %s\n") %
300 (pat, stringutil.forcebytestr(inst.strerror)))
300 (pat, stringutil.forcebytestr(inst.strerror)))
301 continue
301 continue
302 # else: re or relre - which cannot be normalized
302 # else: re or relre - which cannot be normalized
303 kindpats.append((kind, pat, ''))
303 kindpats.append((kind, pat, ''))
304 return kindpats
304 return kindpats
305
305
306 class basematcher(object):
306 class basematcher(object):
307
307
308 def __init__(self, badfn=None):
308 def __init__(self, badfn=None):
309 if badfn is not None:
309 if badfn is not None:
310 self.bad = badfn
310 self.bad = badfn
311
311
312 def __call__(self, fn):
312 def __call__(self, fn):
313 return self.matchfn(fn)
313 return self.matchfn(fn)
314 # Callbacks related to how the matcher is used by dirstate.walk.
314 # Callbacks related to how the matcher is used by dirstate.walk.
315 # Subscribers to these events must monkeypatch the matcher object.
315 # Subscribers to these events must monkeypatch the matcher object.
316 def bad(self, f, msg):
316 def bad(self, f, msg):
317 '''Callback from dirstate.walk for each explicit file that can't be
317 '''Callback from dirstate.walk for each explicit file that can't be
318 found/accessed, with an error message.'''
318 found/accessed, with an error message.'''
319
319
320 # If an explicitdir is set, it will be called when an explicitly listed
320 # If an explicitdir is set, it will be called when an explicitly listed
321 # directory is visited.
321 # directory is visited.
322 explicitdir = None
322 explicitdir = None
323
323
324 # If an traversedir is set, it will be called when a directory discovered
324 # If an traversedir is set, it will be called when a directory discovered
325 # by recursive traversal is visited.
325 # by recursive traversal is visited.
326 traversedir = None
326 traversedir = None
327
327
328 @propertycache
328 @propertycache
329 def _files(self):
329 def _files(self):
330 return []
330 return []
331
331
332 def files(self):
332 def files(self):
333 '''Explicitly listed files or patterns or roots:
333 '''Explicitly listed files or patterns or roots:
334 if no patterns or .always(): empty list,
334 if no patterns or .always(): empty list,
335 if exact: list exact files,
335 if exact: list exact files,
336 if not .anypats(): list all files and dirs,
336 if not .anypats(): list all files and dirs,
337 else: optimal roots'''
337 else: optimal roots'''
338 return self._files
338 return self._files
339
339
340 @propertycache
340 @propertycache
341 def _fileset(self):
341 def _fileset(self):
342 return set(self._files)
342 return set(self._files)
343
343
344 def exact(self, f):
344 def exact(self, f):
345 '''Returns True if f is in .files().'''
345 '''Returns True if f is in .files().'''
346 return f in self._fileset
346 return f in self._fileset
347
347
348 def matchfn(self, f):
348 def matchfn(self, f):
349 return False
349 return False
350
350
351 def visitdir(self, dir):
351 def visitdir(self, dir):
352 '''Decides whether a directory should be visited based on whether it
352 '''Decides whether a directory should be visited based on whether it
353 has potential matches in it or one of its subdirectories. This is
353 has potential matches in it or one of its subdirectories. This is
354 based on the match's primary, included, and excluded patterns.
354 based on the match's primary, included, and excluded patterns.
355
355
356 Returns the string 'all' if the given directory and all subdirectories
356 Returns the string 'all' if the given directory and all subdirectories
357 should be visited. Otherwise returns True or False indicating whether
357 should be visited. Otherwise returns True or False indicating whether
358 the given directory should be visited.
358 the given directory should be visited.
359 '''
359 '''
360 return True
360 return True
361
361
362 def visitchildrenset(self, dir):
362 def visitchildrenset(self, dir):
363 '''Decides whether a directory should be visited based on whether it
363 '''Decides whether a directory should be visited based on whether it
364 has potential matches in it or one of its subdirectories, and
364 has potential matches in it or one of its subdirectories, and
365 potentially lists which subdirectories of that directory should be
365 potentially lists which subdirectories of that directory should be
366 visited. This is based on the match's primary, included, and excluded
366 visited. This is based on the match's primary, included, and excluded
367 patterns.
367 patterns.
368
368
369 This function is very similar to 'visitdir', and the following mapping
369 This function is very similar to 'visitdir', and the following mapping
370 can be applied:
370 can be applied:
371
371
372 visitdir | visitchildrenlist
372 visitdir | visitchildrenlist
373 ----------+-------------------
373 ----------+-------------------
374 False | set()
374 False | set()
375 'all' | 'all'
375 'all' | 'all'
376 True | 'this' OR non-empty set of subdirs -or files- to visit
376 True | 'this' OR non-empty set of subdirs -or files- to visit
377
377
378 Example:
378 Example:
379 Assume matchers ['path:foo/bar', 'rootfilesin:qux'], we would return
379 Assume matchers ['path:foo/bar', 'rootfilesin:qux'], we would return
380 the following values (assuming the implementation of visitchildrenset
380 the following values (assuming the implementation of visitchildrenset
381 is capable of recognizing this; some implementations are not).
381 is capable of recognizing this; some implementations are not).
382
382
383 '' -> {'foo', 'qux'}
383 '' -> {'foo', 'qux'}
384 'baz' -> set()
384 'baz' -> set()
385 'foo' -> {'bar'}
385 'foo' -> {'bar'}
386 # Ideally this would be 'all', but since the prefix nature of matchers
386 # Ideally this would be 'all', but since the prefix nature of matchers
387 # is applied to the entire matcher, we have to downgrade this to
387 # is applied to the entire matcher, we have to downgrade this to
388 # 'this' due to the non-prefix 'rootfilesin'-kind matcher being mixed
388 # 'this' due to the non-prefix 'rootfilesin'-kind matcher being mixed
389 # in.
389 # in.
390 'foo/bar' -> 'this'
390 'foo/bar' -> 'this'
391 'qux' -> 'this'
391 'qux' -> 'this'
392
392
393 Important:
393 Important:
394 Most matchers do not know if they're representing files or
394 Most matchers do not know if they're representing files or
395 directories. They see ['path:dir/f'] and don't know whether 'f' is a
395 directories. They see ['path:dir/f'] and don't know whether 'f' is a
396 file or a directory, so visitchildrenset('dir') for most matchers will
396 file or a directory, so visitchildrenset('dir') for most matchers will
397 return {'f'}, but if the matcher knows it's a file (like exactmatcher
397 return {'f'}, but if the matcher knows it's a file (like exactmatcher
398 does), it may return 'this'. Do not rely on the return being a set
398 does), it may return 'this'. Do not rely on the return being a set
399 indicating that there are no files in this dir to investigate (or
399 indicating that there are no files in this dir to investigate (or
400 equivalently that if there are files to investigate in 'dir' that it
400 equivalently that if there are files to investigate in 'dir' that it
401 will always return 'this').
401 will always return 'this').
402 '''
402 '''
403 return 'this'
403 return 'this'
404
404
405 def always(self):
405 def always(self):
406 '''Matcher will match everything and .files() will be empty --
406 '''Matcher will match everything and .files() will be empty --
407 optimization might be possible.'''
407 optimization might be possible.'''
408 return False
408 return False
409
409
410 def isexact(self):
410 def isexact(self):
411 '''Matcher will match exactly the list of files in .files() --
411 '''Matcher will match exactly the list of files in .files() --
412 optimization might be possible.'''
412 optimization might be possible.'''
413 return False
413 return False
414
414
415 def prefix(self):
415 def prefix(self):
416 '''Matcher will match the paths in .files() recursively --
416 '''Matcher will match the paths in .files() recursively --
417 optimization might be possible.'''
417 optimization might be possible.'''
418 return False
418 return False
419
419
420 def anypats(self):
420 def anypats(self):
421 '''None of .always(), .isexact(), and .prefix() is true --
421 '''None of .always(), .isexact(), and .prefix() is true --
422 optimizations will be difficult.'''
422 optimizations will be difficult.'''
423 return not self.always() and not self.isexact() and not self.prefix()
423 return not self.always() and not self.isexact() and not self.prefix()
424
424
425 class alwaysmatcher(basematcher):
425 class alwaysmatcher(basematcher):
426 '''Matches everything.'''
426 '''Matches everything.'''
427
427
428 def __init__(self, badfn=None):
428 def __init__(self, badfn=None):
429 super(alwaysmatcher, self).__init__(badfn)
429 super(alwaysmatcher, self).__init__(badfn)
430
430
431 def always(self):
431 def always(self):
432 return True
432 return True
433
433
434 def matchfn(self, f):
434 def matchfn(self, f):
435 return True
435 return True
436
436
437 def visitdir(self, dir):
437 def visitdir(self, dir):
438 return 'all'
438 return 'all'
439
439
440 def visitchildrenset(self, dir):
440 def visitchildrenset(self, dir):
441 return 'all'
441 return 'all'
442
442
443 def __repr__(self):
443 def __repr__(self):
444 return r'<alwaysmatcher>'
444 return r'<alwaysmatcher>'
445
445
446 class nevermatcher(basematcher):
446 class nevermatcher(basematcher):
447 '''Matches nothing.'''
447 '''Matches nothing.'''
448
448
449 def __init__(self, badfn=None):
449 def __init__(self, badfn=None):
450 super(nevermatcher, self).__init__(badfn)
450 super(nevermatcher, self).__init__(badfn)
451
451
452 # It's a little weird to say that the nevermatcher is an exact matcher
452 # It's a little weird to say that the nevermatcher is an exact matcher
453 # or a prefix matcher, but it seems to make sense to let callers take
453 # or a prefix matcher, but it seems to make sense to let callers take
454 # fast paths based on either. There will be no exact matches, nor any
454 # fast paths based on either. There will be no exact matches, nor any
455 # prefixes (files() returns []), so fast paths iterating over them should
455 # prefixes (files() returns []), so fast paths iterating over them should
456 # be efficient (and correct).
456 # be efficient (and correct).
457 def isexact(self):
457 def isexact(self):
458 return True
458 return True
459
459
460 def prefix(self):
460 def prefix(self):
461 return True
461 return True
462
462
463 def visitdir(self, dir):
463 def visitdir(self, dir):
464 return False
464 return False
465
465
466 def visitchildrenset(self, dir):
466 def visitchildrenset(self, dir):
467 return set()
467 return set()
468
468
469 def __repr__(self):
469 def __repr__(self):
470 return r'<nevermatcher>'
470 return r'<nevermatcher>'
471
471
472 class predicatematcher(basematcher):
472 class predicatematcher(basematcher):
473 """A matcher adapter for a simple boolean function"""
473 """A matcher adapter for a simple boolean function"""
474
474
475 def __init__(self, predfn, predrepr=None, badfn=None):
475 def __init__(self, predfn, predrepr=None, badfn=None):
476 super(predicatematcher, self).__init__(badfn)
476 super(predicatematcher, self).__init__(badfn)
477 self.matchfn = predfn
477 self.matchfn = predfn
478 self._predrepr = predrepr
478 self._predrepr = predrepr
479
479
480 @encoding.strmethod
480 @encoding.strmethod
481 def __repr__(self):
481 def __repr__(self):
482 s = (stringutil.buildrepr(self._predrepr)
482 s = (stringutil.buildrepr(self._predrepr)
483 or pycompat.byterepr(self.matchfn))
483 or pycompat.byterepr(self.matchfn))
484 return '<predicatenmatcher pred=%s>' % s
484 return '<predicatenmatcher pred=%s>' % s
485
485
486 def normalizerootdir(dir, funcname):
486 def normalizerootdir(dir, funcname):
487 if dir == '.':
487 if dir == '.':
488 util.nouideprecwarn("match.%s() no longer accepts "
488 util.nouideprecwarn("match.%s() no longer accepts "
489 "'.', use '' instead." % funcname, '5.1')
489 "'.', use '' instead." % funcname, '5.1')
490 return ''
490 return ''
491 return dir
491 return dir
492
492
493
493
494 class patternmatcher(basematcher):
494 class patternmatcher(basematcher):
495 """Matches a set of (kind, pat, source) against a 'root' directory.
495 """Matches a set of (kind, pat, source) against a 'root' directory.
496
496
497 >>> kindpats = [
497 >>> kindpats = [
498 ... (b're', br'.*\.c$', b''),
498 ... (b're', br'.*\.c$', b''),
499 ... (b'path', b'foo/a', b''),
499 ... (b'path', b'foo/a', b''),
500 ... (b'relpath', b'b', b''),
500 ... (b'relpath', b'b', b''),
501 ... (b'glob', b'*.h', b''),
501 ... (b'glob', b'*.h', b''),
502 ... ]
502 ... ]
503 >>> m = patternmatcher(b'foo', kindpats)
503 >>> m = patternmatcher(b'foo', kindpats)
504 >>> m(b'main.c') # matches re:.*\.c$
504 >>> m(b'main.c') # matches re:.*\.c$
505 True
505 True
506 >>> m(b'b.txt')
506 >>> m(b'b.txt')
507 False
507 False
508 >>> m(b'foo/a') # matches path:foo/a
508 >>> m(b'foo/a') # matches path:foo/a
509 True
509 True
510 >>> m(b'a') # does not match path:b, since 'root' is 'foo'
510 >>> m(b'a') # does not match path:b, since 'root' is 'foo'
511 False
511 False
512 >>> m(b'b') # matches relpath:b, since 'root' is 'foo'
512 >>> m(b'b') # matches relpath:b, since 'root' is 'foo'
513 True
513 True
514 >>> m(b'lib.h') # matches glob:*.h
514 >>> m(b'lib.h') # matches glob:*.h
515 True
515 True
516
516
517 >>> m.files()
517 >>> m.files()
518 ['', 'foo/a', 'b', '']
518 ['', 'foo/a', 'b', '']
519 >>> m.exact(b'foo/a')
519 >>> m.exact(b'foo/a')
520 True
520 True
521 >>> m.exact(b'b')
521 >>> m.exact(b'b')
522 True
522 True
523 >>> m.exact(b'lib.h') # exact matches are for (rel)path kinds
523 >>> m.exact(b'lib.h') # exact matches are for (rel)path kinds
524 False
524 False
525 """
525 """
526
526
527 def __init__(self, root, kindpats, badfn=None):
527 def __init__(self, root, kindpats, badfn=None):
528 super(patternmatcher, self).__init__(badfn)
528 super(patternmatcher, self).__init__(badfn)
529
529
530 self._files = _explicitfiles(kindpats)
530 self._files = _explicitfiles(kindpats)
531 self._prefix = _prefix(kindpats)
531 self._prefix = _prefix(kindpats)
532 self._pats, self.matchfn = _buildmatch(kindpats, '$', root)
532 self._pats, self.matchfn = _buildmatch(kindpats, '$', root)
533
533
534 @propertycache
534 @propertycache
535 def _dirs(self):
535 def _dirs(self):
536 return set(util.dirs(self._fileset))
536 return set(util.dirs(self._fileset))
537
537
538 def visitdir(self, dir):
538 def visitdir(self, dir):
539 dir = normalizerootdir(dir, 'visitdir')
539 dir = normalizerootdir(dir, 'visitdir')
540 if self._prefix and dir in self._fileset:
540 if self._prefix and dir in self._fileset:
541 return 'all'
541 return 'all'
542 return (dir in self._fileset or
542 return (dir in self._fileset or
543 dir in self._dirs or
543 dir in self._dirs or
544 any(parentdir in self._fileset
544 any(parentdir in self._fileset
545 for parentdir in util.finddirs(dir)))
545 for parentdir in util.finddirs(dir)))
546
546
547 def visitchildrenset(self, dir):
547 def visitchildrenset(self, dir):
548 ret = self.visitdir(dir)
548 ret = self.visitdir(dir)
549 if ret is True:
549 if ret is True:
550 return 'this'
550 return 'this'
551 elif not ret:
551 elif not ret:
552 return set()
552 return set()
553 assert ret == 'all'
553 assert ret == 'all'
554 return 'all'
554 return 'all'
555
555
556 def prefix(self):
556 def prefix(self):
557 return self._prefix
557 return self._prefix
558
558
559 @encoding.strmethod
559 @encoding.strmethod
560 def __repr__(self):
560 def __repr__(self):
561 return ('<patternmatcher patterns=%r>' % pycompat.bytestr(self._pats))
561 return ('<patternmatcher patterns=%r>' % pycompat.bytestr(self._pats))
562
562
563 # This is basically a reimplementation of util.dirs that stores the children
563 # This is basically a reimplementation of util.dirs that stores the children
564 # instead of just a count of them, plus a small optional optimization to avoid
564 # instead of just a count of them, plus a small optional optimization to avoid
565 # some directories we don't need.
565 # some directories we don't need.
566 class _dirchildren(object):
566 class _dirchildren(object):
567 def __init__(self, paths, onlyinclude=None):
567 def __init__(self, paths, onlyinclude=None):
568 self._dirs = {}
568 self._dirs = {}
569 self._onlyinclude = onlyinclude or []
569 self._onlyinclude = onlyinclude or []
570 addpath = self.addpath
570 addpath = self.addpath
571 for f in paths:
571 for f in paths:
572 addpath(f)
572 addpath(f)
573
573
574 def addpath(self, path):
574 def addpath(self, path):
575 if path == '':
575 if path == '':
576 return
576 return
577 dirs = self._dirs
577 dirs = self._dirs
578 findsplitdirs = _dirchildren._findsplitdirs
578 findsplitdirs = _dirchildren._findsplitdirs
579 for d, b in findsplitdirs(path):
579 for d, b in findsplitdirs(path):
580 if d not in self._onlyinclude:
580 if d not in self._onlyinclude:
581 continue
581 continue
582 dirs.setdefault(d, set()).add(b)
582 dirs.setdefault(d, set()).add(b)
583
583
584 @staticmethod
584 @staticmethod
585 def _findsplitdirs(path):
585 def _findsplitdirs(path):
586 # yields (dirname, basename) tuples, walking back to the root. This is
586 # yields (dirname, basename) tuples, walking back to the root. This is
587 # very similar to util.finddirs, except:
587 # very similar to util.finddirs, except:
588 # - produces a (dirname, basename) tuple, not just 'dirname'
588 # - produces a (dirname, basename) tuple, not just 'dirname'
589 # Unlike manifest._splittopdir, this does not suffix `dirname` with a
589 # Unlike manifest._splittopdir, this does not suffix `dirname` with a
590 # slash.
590 # slash.
591 oldpos = len(path)
591 oldpos = len(path)
592 pos = path.rfind('/')
592 pos = path.rfind('/')
593 while pos != -1:
593 while pos != -1:
594 yield path[:pos], path[pos + 1:oldpos]
594 yield path[:pos], path[pos + 1:oldpos]
595 oldpos = pos
595 oldpos = pos
596 pos = path.rfind('/', 0, pos)
596 pos = path.rfind('/', 0, pos)
597 yield '', path[:oldpos]
597 yield '', path[:oldpos]
598
598
599 def get(self, path):
599 def get(self, path):
600 return self._dirs.get(path, set())
600 return self._dirs.get(path, set())
601
601
602 class includematcher(basematcher):
602 class includematcher(basematcher):
603
603
604 def __init__(self, root, kindpats, badfn=None):
604 def __init__(self, root, kindpats, badfn=None):
605 super(includematcher, self).__init__(badfn)
605 super(includematcher, self).__init__(badfn)
606
606
607 self._pats, self.matchfn = _buildmatch(kindpats, '(?:/|$)', root)
607 self._pats, self.matchfn = _buildmatch(kindpats, '(?:/|$)', root)
608 self._prefix = _prefix(kindpats)
608 self._prefix = _prefix(kindpats)
609 roots, dirs, parents = _rootsdirsandparents(kindpats)
609 roots, dirs, parents = _rootsdirsandparents(kindpats)
610 # roots are directories which are recursively included.
610 # roots are directories which are recursively included.
611 self._roots = set(roots)
611 self._roots = set(roots)
612 # dirs are directories which are non-recursively included.
612 # dirs are directories which are non-recursively included.
613 self._dirs = set(dirs)
613 self._dirs = set(dirs)
614 # parents are directories which are non-recursively included because
614 # parents are directories which are non-recursively included because
615 # they are needed to get to items in _dirs or _roots.
615 # they are needed to get to items in _dirs or _roots.
616 self._parents = parents
616 self._parents = parents
617
617
618 def visitdir(self, dir):
618 def visitdir(self, dir):
619 dir = normalizerootdir(dir, 'visitdir')
619 dir = normalizerootdir(dir, 'visitdir')
620 if self._prefix and dir in self._roots:
620 if self._prefix and dir in self._roots:
621 return 'all'
621 return 'all'
622 return (dir in self._roots or
622 return (dir in self._roots or
623 dir in self._dirs or
623 dir in self._dirs or
624 dir in self._parents or
624 dir in self._parents or
625 any(parentdir in self._roots
625 any(parentdir in self._roots
626 for parentdir in util.finddirs(dir)))
626 for parentdir in util.finddirs(dir)))
627
627
628 @propertycache
628 @propertycache
629 def _allparentschildren(self):
629 def _allparentschildren(self):
630 # It may seem odd that we add dirs, roots, and parents, and then
630 # It may seem odd that we add dirs, roots, and parents, and then
631 # restrict to only parents. This is to catch the case of:
631 # restrict to only parents. This is to catch the case of:
632 # dirs = ['foo/bar']
632 # dirs = ['foo/bar']
633 # parents = ['foo']
633 # parents = ['foo']
634 # if we asked for the children of 'foo', but had only added
634 # if we asked for the children of 'foo', but had only added
635 # self._parents, we wouldn't be able to respond ['bar'].
635 # self._parents, we wouldn't be able to respond ['bar'].
636 return _dirchildren(
636 return _dirchildren(
637 itertools.chain(self._dirs, self._roots, self._parents),
637 itertools.chain(self._dirs, self._roots, self._parents),
638 onlyinclude=self._parents)
638 onlyinclude=self._parents)
639
639
640 def visitchildrenset(self, dir):
640 def visitchildrenset(self, dir):
641 if self._prefix and dir in self._roots:
641 if self._prefix and dir in self._roots:
642 return 'all'
642 return 'all'
643 # Note: this does *not* include the 'dir in self._parents' case from
643 # Note: this does *not* include the 'dir in self._parents' case from
644 # visitdir, that's handled below.
644 # visitdir, that's handled below.
645 if ('' in self._roots or
645 if ('' in self._roots or
646 dir in self._roots or
646 dir in self._roots or
647 dir in self._dirs or
647 dir in self._dirs or
648 any(parentdir in self._roots
648 any(parentdir in self._roots
649 for parentdir in util.finddirs(dir))):
649 for parentdir in util.finddirs(dir))):
650 return 'this'
650 return 'this'
651
651
652 if dir in self._parents:
652 if dir in self._parents:
653 return self._allparentschildren.get(dir) or set()
653 return self._allparentschildren.get(dir) or set()
654 return set()
654 return set()
655
655
656 @encoding.strmethod
656 @encoding.strmethod
657 def __repr__(self):
657 def __repr__(self):
658 return ('<includematcher includes=%r>' % pycompat.bytestr(self._pats))
658 return ('<includematcher includes=%r>' % pycompat.bytestr(self._pats))
659
659
660 class exactmatcher(basematcher):
660 class exactmatcher(basematcher):
661 r'''Matches the input files exactly. They are interpreted as paths, not
661 r'''Matches the input files exactly. They are interpreted as paths, not
662 patterns (so no kind-prefixes).
662 patterns (so no kind-prefixes).
663
663
664 >>> m = exactmatcher([b'a.txt', br're:.*\.c$'])
664 >>> m = exactmatcher([b'a.txt', br're:.*\.c$'])
665 >>> m(b'a.txt')
665 >>> m(b'a.txt')
666 True
666 True
667 >>> m(b'b.txt')
667 >>> m(b'b.txt')
668 False
668 False
669
669
670 Input files that would be matched are exactly those returned by .files()
670 Input files that would be matched are exactly those returned by .files()
671 >>> m.files()
671 >>> m.files()
672 ['a.txt', 're:.*\\.c$']
672 ['a.txt', 're:.*\\.c$']
673
673
674 So pattern 're:.*\.c$' is not considered as a regex, but as a file name
674 So pattern 're:.*\.c$' is not considered as a regex, but as a file name
675 >>> m(b'main.c')
675 >>> m(b'main.c')
676 False
676 False
677 >>> m(br're:.*\.c$')
677 >>> m(br're:.*\.c$')
678 True
678 True
679 '''
679 '''
680
680
681 def __init__(self, files, badfn=None):
681 def __init__(self, files, badfn=None):
682 super(exactmatcher, self).__init__(badfn)
682 super(exactmatcher, self).__init__(badfn)
683
683
684 if isinstance(files, list):
684 if isinstance(files, list):
685 self._files = files
685 self._files = files
686 else:
686 else:
687 self._files = list(files)
687 self._files = list(files)
688
688
689 matchfn = basematcher.exact
689 matchfn = basematcher.exact
690
690
691 @propertycache
691 @propertycache
692 def _dirs(self):
692 def _dirs(self):
693 return set(util.dirs(self._fileset))
693 return set(util.dirs(self._fileset))
694
694
695 def visitdir(self, dir):
695 def visitdir(self, dir):
696 dir = normalizerootdir(dir, 'visitdir')
696 dir = normalizerootdir(dir, 'visitdir')
697 return dir in self._dirs
697 return dir in self._dirs
698
698
699 def visitchildrenset(self, dir):
699 def visitchildrenset(self, dir):
700 dir = normalizerootdir(dir, 'visitchildrenset')
700 dir = normalizerootdir(dir, 'visitchildrenset')
701
701
702 if not self._fileset or dir not in self._dirs:
702 if not self._fileset or dir not in self._dirs:
703 return set()
703 return set()
704
704
705 candidates = self._fileset | self._dirs - {''}
705 candidates = self._fileset | self._dirs - {''}
706 if dir != '':
706 if dir != '':
707 d = dir + '/'
707 d = dir + '/'
708 candidates = set(c[len(d):] for c in candidates if
708 candidates = set(c[len(d):] for c in candidates if
709 c.startswith(d))
709 c.startswith(d))
710 # self._dirs includes all of the directories, recursively, so if
710 # self._dirs includes all of the directories, recursively, so if
711 # we're attempting to match foo/bar/baz.txt, it'll have '', 'foo',
711 # we're attempting to match foo/bar/baz.txt, it'll have '', 'foo',
712 # 'foo/bar' in it. Thus we can safely ignore a candidate that has a
712 # 'foo/bar' in it. Thus we can safely ignore a candidate that has a
713 # '/' in it, indicating a it's for a subdir-of-a-subdir; the
713 # '/' in it, indicating a it's for a subdir-of-a-subdir; the
714 # immediate subdir will be in there without a slash.
714 # immediate subdir will be in there without a slash.
715 ret = {c for c in candidates if '/' not in c}
715 ret = {c for c in candidates if '/' not in c}
716 # We really do not expect ret to be empty, since that would imply that
716 # We really do not expect ret to be empty, since that would imply that
717 # there's something in _dirs that didn't have a file in _fileset.
717 # there's something in _dirs that didn't have a file in _fileset.
718 assert ret
718 assert ret
719 return ret
719 return ret
720
720
721 def isexact(self):
721 def isexact(self):
722 return True
722 return True
723
723
724 @encoding.strmethod
724 @encoding.strmethod
725 def __repr__(self):
725 def __repr__(self):
726 return ('<exactmatcher files=%r>' % self._files)
726 return ('<exactmatcher files=%r>' % self._files)
727
727
728 class differencematcher(basematcher):
728 class differencematcher(basematcher):
729 '''Composes two matchers by matching if the first matches and the second
729 '''Composes two matchers by matching if the first matches and the second
730 does not.
730 does not.
731
731
732 The second matcher's non-matching-attributes (bad, explicitdir,
732 The second matcher's non-matching-attributes (bad, explicitdir,
733 traversedir) are ignored.
733 traversedir) are ignored.
734 '''
734 '''
735 def __init__(self, m1, m2):
735 def __init__(self, m1, m2):
736 super(differencematcher, self).__init__()
736 super(differencematcher, self).__init__()
737 self._m1 = m1
737 self._m1 = m1
738 self._m2 = m2
738 self._m2 = m2
739 self.bad = m1.bad
739 self.bad = m1.bad
740 self.explicitdir = m1.explicitdir
740 self.explicitdir = m1.explicitdir
741 self.traversedir = m1.traversedir
741 self.traversedir = m1.traversedir
742
742
743 def matchfn(self, f):
743 def matchfn(self, f):
744 return self._m1(f) and not self._m2(f)
744 return self._m1(f) and not self._m2(f)
745
745
746 @propertycache
746 @propertycache
747 def _files(self):
747 def _files(self):
748 if self.isexact():
748 if self.isexact():
749 return [f for f in self._m1.files() if self(f)]
749 return [f for f in self._m1.files() if self(f)]
750 # If m1 is not an exact matcher, we can't easily figure out the set of
750 # If m1 is not an exact matcher, we can't easily figure out the set of
751 # files, because its files() are not always files. For example, if
751 # files, because its files() are not always files. For example, if
752 # m1 is "path:dir" and m2 is "rootfileins:.", we don't
752 # m1 is "path:dir" and m2 is "rootfileins:.", we don't
753 # want to remove "dir" from the set even though it would match m2,
753 # want to remove "dir" from the set even though it would match m2,
754 # because the "dir" in m1 may not be a file.
754 # because the "dir" in m1 may not be a file.
755 return self._m1.files()
755 return self._m1.files()
756
756
757 def visitdir(self, dir):
757 def visitdir(self, dir):
758 if self._m2.visitdir(dir) == 'all':
758 if self._m2.visitdir(dir) == 'all':
759 return False
759 return False
760 elif not self._m2.visitdir(dir):
760 elif not self._m2.visitdir(dir):
761 # m2 does not match dir, we can return 'all' here if possible
761 # m2 does not match dir, we can return 'all' here if possible
762 return self._m1.visitdir(dir)
762 return self._m1.visitdir(dir)
763 return bool(self._m1.visitdir(dir))
763 return bool(self._m1.visitdir(dir))
764
764
765 def visitchildrenset(self, dir):
765 def visitchildrenset(self, dir):
766 m2_set = self._m2.visitchildrenset(dir)
766 m2_set = self._m2.visitchildrenset(dir)
767 if m2_set == 'all':
767 if m2_set == 'all':
768 return set()
768 return set()
769 m1_set = self._m1.visitchildrenset(dir)
769 m1_set = self._m1.visitchildrenset(dir)
770 # Possible values for m1: 'all', 'this', set(...), set()
770 # Possible values for m1: 'all', 'this', set(...), set()
771 # Possible values for m2: 'this', set(...), set()
771 # Possible values for m2: 'this', set(...), set()
772 # If m2 has nothing under here that we care about, return m1, even if
772 # If m2 has nothing under here that we care about, return m1, even if
773 # it's 'all'. This is a change in behavior from visitdir, which would
773 # it's 'all'. This is a change in behavior from visitdir, which would
774 # return True, not 'all', for some reason.
774 # return True, not 'all', for some reason.
775 if not m2_set:
775 if not m2_set:
776 return m1_set
776 return m1_set
777 if m1_set in ['all', 'this']:
777 if m1_set in ['all', 'this']:
778 # Never return 'all' here if m2_set is any kind of non-empty (either
778 # Never return 'all' here if m2_set is any kind of non-empty (either
779 # 'this' or set(foo)), since m2 might return set() for a
779 # 'this' or set(foo)), since m2 might return set() for a
780 # subdirectory.
780 # subdirectory.
781 return 'this'
781 return 'this'
782 # Possible values for m1: set(...), set()
782 # Possible values for m1: set(...), set()
783 # Possible values for m2: 'this', set(...)
783 # Possible values for m2: 'this', set(...)
784 # We ignore m2's set results. They're possibly incorrect:
784 # We ignore m2's set results. They're possibly incorrect:
785 # m1 = path:dir/subdir, m2=rootfilesin:dir, visitchildrenset(''):
785 # m1 = path:dir/subdir, m2=rootfilesin:dir, visitchildrenset(''):
786 # m1 returns {'dir'}, m2 returns {'dir'}, if we subtracted we'd
786 # m1 returns {'dir'}, m2 returns {'dir'}, if we subtracted we'd
787 # return set(), which is *not* correct, we still need to visit 'dir'!
787 # return set(), which is *not* correct, we still need to visit 'dir'!
788 return m1_set
788 return m1_set
789
789
790 def isexact(self):
790 def isexact(self):
791 return self._m1.isexact()
791 return self._m1.isexact()
792
792
793 @encoding.strmethod
793 @encoding.strmethod
794 def __repr__(self):
794 def __repr__(self):
795 return ('<differencematcher m1=%r, m2=%r>' % (self._m1, self._m2))
795 return ('<differencematcher m1=%r, m2=%r>' % (self._m1, self._m2))
796
796
797 def intersectmatchers(m1, m2):
797 def intersectmatchers(m1, m2):
798 '''Composes two matchers by matching if both of them match.
798 '''Composes two matchers by matching if both of them match.
799
799
800 The second matcher's non-matching-attributes (bad, explicitdir,
800 The second matcher's non-matching-attributes (bad, explicitdir,
801 traversedir) are ignored.
801 traversedir) are ignored.
802 '''
802 '''
803 if m1 is None or m2 is None:
803 if m1 is None or m2 is None:
804 return m1 or m2
804 return m1 or m2
805 if m1.always():
805 if m1.always():
806 m = copy.copy(m2)
806 m = copy.copy(m2)
807 # TODO: Consider encapsulating these things in a class so there's only
807 # TODO: Consider encapsulating these things in a class so there's only
808 # one thing to copy from m1.
808 # one thing to copy from m1.
809 m.bad = m1.bad
809 m.bad = m1.bad
810 m.explicitdir = m1.explicitdir
810 m.explicitdir = m1.explicitdir
811 m.traversedir = m1.traversedir
811 m.traversedir = m1.traversedir
812 return m
812 return m
813 if m2.always():
813 if m2.always():
814 m = copy.copy(m1)
814 m = copy.copy(m1)
815 return m
815 return m
816 return intersectionmatcher(m1, m2)
816 return intersectionmatcher(m1, m2)
817
817
818 class intersectionmatcher(basematcher):
818 class intersectionmatcher(basematcher):
819 def __init__(self, m1, m2):
819 def __init__(self, m1, m2):
820 super(intersectionmatcher, self).__init__()
820 super(intersectionmatcher, self).__init__()
821 self._m1 = m1
821 self._m1 = m1
822 self._m2 = m2
822 self._m2 = m2
823 self.bad = m1.bad
823 self.bad = m1.bad
824 self.explicitdir = m1.explicitdir
824 self.explicitdir = m1.explicitdir
825 self.traversedir = m1.traversedir
825 self.traversedir = m1.traversedir
826
826
827 @propertycache
827 @propertycache
828 def _files(self):
828 def _files(self):
829 if self.isexact():
829 if self.isexact():
830 m1, m2 = self._m1, self._m2
830 m1, m2 = self._m1, self._m2
831 if not m1.isexact():
831 if not m1.isexact():
832 m1, m2 = m2, m1
832 m1, m2 = m2, m1
833 return [f for f in m1.files() if m2(f)]
833 return [f for f in m1.files() if m2(f)]
834 # It neither m1 nor m2 is an exact matcher, we can't easily intersect
834 # It neither m1 nor m2 is an exact matcher, we can't easily intersect
835 # the set of files, because their files() are not always files. For
835 # the set of files, because their files() are not always files. For
836 # example, if intersecting a matcher "-I glob:foo.txt" with matcher of
836 # example, if intersecting a matcher "-I glob:foo.txt" with matcher of
837 # "path:dir2", we don't want to remove "dir2" from the set.
837 # "path:dir2", we don't want to remove "dir2" from the set.
838 return self._m1.files() + self._m2.files()
838 return self._m1.files() + self._m2.files()
839
839
840 def matchfn(self, f):
840 def matchfn(self, f):
841 return self._m1(f) and self._m2(f)
841 return self._m1(f) and self._m2(f)
842
842
843 def visitdir(self, dir):
843 def visitdir(self, dir):
844 visit1 = self._m1.visitdir(dir)
844 visit1 = self._m1.visitdir(dir)
845 if visit1 == 'all':
845 if visit1 == 'all':
846 return self._m2.visitdir(dir)
846 return self._m2.visitdir(dir)
847 # bool() because visit1=True + visit2='all' should not be 'all'
847 # bool() because visit1=True + visit2='all' should not be 'all'
848 return bool(visit1 and self._m2.visitdir(dir))
848 return bool(visit1 and self._m2.visitdir(dir))
849
849
850 def visitchildrenset(self, dir):
850 def visitchildrenset(self, dir):
851 m1_set = self._m1.visitchildrenset(dir)
851 m1_set = self._m1.visitchildrenset(dir)
852 if not m1_set:
852 if not m1_set:
853 return set()
853 return set()
854 m2_set = self._m2.visitchildrenset(dir)
854 m2_set = self._m2.visitchildrenset(dir)
855 if not m2_set:
855 if not m2_set:
856 return set()
856 return set()
857
857
858 if m1_set == 'all':
858 if m1_set == 'all':
859 return m2_set
859 return m2_set
860 elif m2_set == 'all':
860 elif m2_set == 'all':
861 return m1_set
861 return m1_set
862
862
863 if m1_set == 'this' or m2_set == 'this':
863 if m1_set == 'this' or m2_set == 'this':
864 return 'this'
864 return 'this'
865
865
866 assert isinstance(m1_set, set) and isinstance(m2_set, set)
866 assert isinstance(m1_set, set) and isinstance(m2_set, set)
867 return m1_set.intersection(m2_set)
867 return m1_set.intersection(m2_set)
868
868
869 def always(self):
869 def always(self):
870 return self._m1.always() and self._m2.always()
870 return self._m1.always() and self._m2.always()
871
871
872 def isexact(self):
872 def isexact(self):
873 return self._m1.isexact() or self._m2.isexact()
873 return self._m1.isexact() or self._m2.isexact()
874
874
875 @encoding.strmethod
875 @encoding.strmethod
876 def __repr__(self):
876 def __repr__(self):
877 return ('<intersectionmatcher m1=%r, m2=%r>' % (self._m1, self._m2))
877 return ('<intersectionmatcher m1=%r, m2=%r>' % (self._m1, self._m2))
878
878
879 class subdirmatcher(basematcher):
879 class subdirmatcher(basematcher):
880 """Adapt a matcher to work on a subdirectory only.
880 """Adapt a matcher to work on a subdirectory only.
881
881
882 The paths are remapped to remove/insert the path as needed:
882 The paths are remapped to remove/insert the path as needed:
883
883
884 >>> from . import pycompat
884 >>> from . import pycompat
885 >>> m1 = match(b'root', b'', [b'a.txt', b'sub/b.txt'])
885 >>> m1 = match(b'root', b'', [b'a.txt', b'sub/b.txt'])
886 >>> m2 = subdirmatcher(b'sub', m1)
886 >>> m2 = subdirmatcher(b'sub', m1)
887 >>> m2(b'a.txt')
887 >>> m2(b'a.txt')
888 False
888 False
889 >>> m2(b'b.txt')
889 >>> m2(b'b.txt')
890 True
890 True
891 >>> m2.matchfn(b'a.txt')
891 >>> m2.matchfn(b'a.txt')
892 False
892 False
893 >>> m2.matchfn(b'b.txt')
893 >>> m2.matchfn(b'b.txt')
894 True
894 True
895 >>> m2.files()
895 >>> m2.files()
896 ['b.txt']
896 ['b.txt']
897 >>> m2.exact(b'b.txt')
897 >>> m2.exact(b'b.txt')
898 True
898 True
899 >>> def bad(f, msg):
899 >>> def bad(f, msg):
900 ... print(pycompat.sysstr(b"%s: %s" % (f, msg)))
900 ... print(pycompat.sysstr(b"%s: %s" % (f, msg)))
901 >>> m1.bad = bad
901 >>> m1.bad = bad
902 >>> m2.bad(b'x.txt', b'No such file')
902 >>> m2.bad(b'x.txt', b'No such file')
903 sub/x.txt: No such file
903 sub/x.txt: No such file
904 """
904 """
905
905
906 def __init__(self, path, matcher):
906 def __init__(self, path, matcher):
907 super(subdirmatcher, self).__init__()
907 super(subdirmatcher, self).__init__()
908 self._path = path
908 self._path = path
909 self._matcher = matcher
909 self._matcher = matcher
910 self._always = matcher.always()
910 self._always = matcher.always()
911
911
912 self._files = [f[len(path) + 1:] for f in matcher._files
912 self._files = [f[len(path) + 1:] for f in matcher._files
913 if f.startswith(path + "/")]
913 if f.startswith(path + "/")]
914
914
915 # If the parent repo had a path to this subrepo and the matcher is
915 # If the parent repo had a path to this subrepo and the matcher is
916 # a prefix matcher, this submatcher always matches.
916 # a prefix matcher, this submatcher always matches.
917 if matcher.prefix():
917 if matcher.prefix():
918 self._always = any(f == path for f in matcher._files)
918 self._always = any(f == path for f in matcher._files)
919
919
920 def bad(self, f, msg):
920 def bad(self, f, msg):
921 self._matcher.bad(self._path + "/" + f, msg)
921 self._matcher.bad(self._path + "/" + f, msg)
922
922
923 def matchfn(self, f):
923 def matchfn(self, f):
924 # Some information is lost in the superclass's constructor, so we
924 # Some information is lost in the superclass's constructor, so we
925 # can not accurately create the matching function for the subdirectory
925 # can not accurately create the matching function for the subdirectory
926 # from the inputs. Instead, we override matchfn() and visitdir() to
926 # from the inputs. Instead, we override matchfn() and visitdir() to
927 # call the original matcher with the subdirectory path prepended.
927 # call the original matcher with the subdirectory path prepended.
928 return self._matcher.matchfn(self._path + "/" + f)
928 return self._matcher.matchfn(self._path + "/" + f)
929
929
930 def visitdir(self, dir):
930 def visitdir(self, dir):
931 dir = normalizerootdir(dir, 'visitdir')
931 dir = normalizerootdir(dir, 'visitdir')
932 if dir == '':
932 if dir == '':
933 dir = self._path
933 dir = self._path
934 else:
934 else:
935 dir = self._path + "/" + dir
935 dir = self._path + "/" + dir
936 return self._matcher.visitdir(dir)
936 return self._matcher.visitdir(dir)
937
937
938 def visitchildrenset(self, dir):
938 def visitchildrenset(self, dir):
939 dir = normalizerootdir(dir, 'visitchildrenset')
939 dir = normalizerootdir(dir, 'visitchildrenset')
940 if dir == '':
940 if dir == '':
941 dir = self._path
941 dir = self._path
942 else:
942 else:
943 dir = self._path + "/" + dir
943 dir = self._path + "/" + dir
944 return self._matcher.visitchildrenset(dir)
944 return self._matcher.visitchildrenset(dir)
945
945
946 def always(self):
946 def always(self):
947 return self._always
947 return self._always
948
948
949 def prefix(self):
949 def prefix(self):
950 return self._matcher.prefix() and not self._always
950 return self._matcher.prefix() and not self._always
951
951
952 @encoding.strmethod
952 @encoding.strmethod
953 def __repr__(self):
953 def __repr__(self):
954 return ('<subdirmatcher path=%r, matcher=%r>' %
954 return ('<subdirmatcher path=%r, matcher=%r>' %
955 (self._path, self._matcher))
955 (self._path, self._matcher))
956
956
957 class prefixdirmatcher(basematcher):
957 class prefixdirmatcher(basematcher):
958 """Adapt a matcher to work on a parent directory.
958 """Adapt a matcher to work on a parent directory.
959
959
960 The matcher's non-matching-attributes (bad, explicitdir, traversedir) are
960 The matcher's non-matching-attributes (bad, explicitdir, traversedir) are
961 ignored.
961 ignored.
962
962
963 The prefix path should usually be the relative path from the root of
963 The prefix path should usually be the relative path from the root of
964 this matcher to the root of the wrapped matcher.
964 this matcher to the root of the wrapped matcher.
965
965
966 >>> m1 = match(util.localpath(b'root/d/e'), b'f', [b'../a.txt', b'b.txt'])
966 >>> m1 = match(util.localpath(b'root/d/e'), b'f', [b'../a.txt', b'b.txt'])
967 >>> m2 = prefixdirmatcher(b'd/e', m1)
967 >>> m2 = prefixdirmatcher(b'd/e', m1)
968 >>> m2(b'a.txt')
968 >>> m2(b'a.txt')
969 False
969 False
970 >>> m2(b'd/e/a.txt')
970 >>> m2(b'd/e/a.txt')
971 True
971 True
972 >>> m2(b'd/e/b.txt')
972 >>> m2(b'd/e/b.txt')
973 False
973 False
974 >>> m2.files()
974 >>> m2.files()
975 ['d/e/a.txt', 'd/e/f/b.txt']
975 ['d/e/a.txt', 'd/e/f/b.txt']
976 >>> m2.exact(b'd/e/a.txt')
976 >>> m2.exact(b'd/e/a.txt')
977 True
977 True
978 >>> m2.visitdir(b'd')
978 >>> m2.visitdir(b'd')
979 True
979 True
980 >>> m2.visitdir(b'd/e')
980 >>> m2.visitdir(b'd/e')
981 True
981 True
982 >>> m2.visitdir(b'd/e/f')
982 >>> m2.visitdir(b'd/e/f')
983 True
983 True
984 >>> m2.visitdir(b'd/e/g')
984 >>> m2.visitdir(b'd/e/g')
985 False
985 False
986 >>> m2.visitdir(b'd/ef')
986 >>> m2.visitdir(b'd/ef')
987 False
987 False
988 """
988 """
989
989
990 def __init__(self, path, matcher, badfn=None):
990 def __init__(self, path, matcher, badfn=None):
991 super(prefixdirmatcher, self).__init__(badfn)
991 super(prefixdirmatcher, self).__init__(badfn)
992 if not path:
992 if not path:
993 raise error.ProgrammingError('prefix path must not be empty')
993 raise error.ProgrammingError('prefix path must not be empty')
994 self._path = path
994 self._path = path
995 self._pathprefix = path + '/'
995 self._pathprefix = path + '/'
996 self._matcher = matcher
996 self._matcher = matcher
997
997
998 @propertycache
998 @propertycache
999 def _files(self):
999 def _files(self):
1000 return [self._pathprefix + f for f in self._matcher._files]
1000 return [self._pathprefix + f for f in self._matcher._files]
1001
1001
1002 def matchfn(self, f):
1002 def matchfn(self, f):
1003 if not f.startswith(self._pathprefix):
1003 if not f.startswith(self._pathprefix):
1004 return False
1004 return False
1005 return self._matcher.matchfn(f[len(self._pathprefix):])
1005 return self._matcher.matchfn(f[len(self._pathprefix):])
1006
1006
1007 @propertycache
1007 @propertycache
1008 def _pathdirs(self):
1008 def _pathdirs(self):
1009 return set(util.finddirs(self._path))
1009 return set(util.finddirs(self._path))
1010
1010
1011 def visitdir(self, dir):
1011 def visitdir(self, dir):
1012 if dir == self._path:
1012 if dir == self._path:
1013 return self._matcher.visitdir('')
1013 return self._matcher.visitdir('')
1014 if dir.startswith(self._pathprefix):
1014 if dir.startswith(self._pathprefix):
1015 return self._matcher.visitdir(dir[len(self._pathprefix):])
1015 return self._matcher.visitdir(dir[len(self._pathprefix):])
1016 return dir in self._pathdirs
1016 return dir in self._pathdirs
1017
1017
1018 def visitchildrenset(self, dir):
1018 def visitchildrenset(self, dir):
1019 if dir == self._path:
1019 if dir == self._path:
1020 return self._matcher.visitchildrenset('')
1020 return self._matcher.visitchildrenset('')
1021 if dir.startswith(self._pathprefix):
1021 if dir.startswith(self._pathprefix):
1022 return self._matcher.visitchildrenset(dir[len(self._pathprefix):])
1022 return self._matcher.visitchildrenset(dir[len(self._pathprefix):])
1023 if dir in self._pathdirs:
1023 if dir in self._pathdirs:
1024 return 'this'
1024 return 'this'
1025 return set()
1025 return set()
1026
1026
1027 def isexact(self):
1027 def isexact(self):
1028 return self._matcher.isexact()
1028 return self._matcher.isexact()
1029
1029
1030 def prefix(self):
1030 def prefix(self):
1031 return self._matcher.prefix()
1031 return self._matcher.prefix()
1032
1032
1033 @encoding.strmethod
1033 @encoding.strmethod
1034 def __repr__(self):
1034 def __repr__(self):
1035 return ('<prefixdirmatcher path=%r, matcher=%r>'
1035 return ('<prefixdirmatcher path=%r, matcher=%r>'
1036 % (pycompat.bytestr(self._path), self._matcher))
1036 % (pycompat.bytestr(self._path), self._matcher))
1037
1037
1038 class unionmatcher(basematcher):
1038 class unionmatcher(basematcher):
1039 """A matcher that is the union of several matchers.
1039 """A matcher that is the union of several matchers.
1040
1040
1041 The non-matching-attributes (bad, explicitdir, traversedir) are taken from
1041 The non-matching-attributes (bad, explicitdir, traversedir) are taken from
1042 the first matcher.
1042 the first matcher.
1043 """
1043 """
1044
1044
1045 def __init__(self, matchers):
1045 def __init__(self, matchers):
1046 m1 = matchers[0]
1046 m1 = matchers[0]
1047 super(unionmatcher, self).__init__()
1047 super(unionmatcher, self).__init__()
1048 self.explicitdir = m1.explicitdir
1048 self.explicitdir = m1.explicitdir
1049 self.traversedir = m1.traversedir
1049 self.traversedir = m1.traversedir
1050 self._matchers = matchers
1050 self._matchers = matchers
1051
1051
1052 def matchfn(self, f):
1052 def matchfn(self, f):
1053 for match in self._matchers:
1053 for match in self._matchers:
1054 if match(f):
1054 if match(f):
1055 return True
1055 return True
1056 return False
1056 return False
1057
1057
1058 def visitdir(self, dir):
1058 def visitdir(self, dir):
1059 r = False
1059 r = False
1060 for m in self._matchers:
1060 for m in self._matchers:
1061 v = m.visitdir(dir)
1061 v = m.visitdir(dir)
1062 if v == 'all':
1062 if v == 'all':
1063 return v
1063 return v
1064 r |= v
1064 r |= v
1065 return r
1065 return r
1066
1066
1067 def visitchildrenset(self, dir):
1067 def visitchildrenset(self, dir):
1068 r = set()
1068 r = set()
1069 this = False
1069 this = False
1070 for m in self._matchers:
1070 for m in self._matchers:
1071 v = m.visitchildrenset(dir)
1071 v = m.visitchildrenset(dir)
1072 if not v:
1072 if not v:
1073 continue
1073 continue
1074 if v == 'all':
1074 if v == 'all':
1075 return v
1075 return v
1076 if this or v == 'this':
1076 if this or v == 'this':
1077 this = True
1077 this = True
1078 # don't break, we might have an 'all' in here.
1078 # don't break, we might have an 'all' in here.
1079 continue
1079 continue
1080 assert isinstance(v, set)
1080 assert isinstance(v, set)
1081 r = r.union(v)
1081 r = r.union(v)
1082 if this:
1082 if this:
1083 return 'this'
1083 return 'this'
1084 return r
1084 return r
1085
1085
1086 @encoding.strmethod
1086 @encoding.strmethod
1087 def __repr__(self):
1087 def __repr__(self):
1088 return ('<unionmatcher matchers=%r>' % self._matchers)
1088 return ('<unionmatcher matchers=%r>' % self._matchers)
1089
1089
1090 def patkind(pattern, default=None):
1090 def patkind(pattern, default=None):
1091 '''If pattern is 'kind:pat' with a known kind, return kind.
1091 '''If pattern is 'kind:pat' with a known kind, return kind.
1092
1092
1093 >>> patkind(br're:.*\.c$')
1093 >>> patkind(br're:.*\.c$')
1094 're'
1094 're'
1095 >>> patkind(b'glob:*.c')
1095 >>> patkind(b'glob:*.c')
1096 'glob'
1096 'glob'
1097 >>> patkind(b'relpath:test.py')
1097 >>> patkind(b'relpath:test.py')
1098 'relpath'
1098 'relpath'
1099 >>> patkind(b'main.py')
1099 >>> patkind(b'main.py')
1100 >>> patkind(b'main.py', default=b're')
1100 >>> patkind(b'main.py', default=b're')
1101 're'
1101 're'
1102 '''
1102 '''
1103 return _patsplit(pattern, default)[0]
1103 return _patsplit(pattern, default)[0]
1104
1104
1105 def _patsplit(pattern, default):
1105 def _patsplit(pattern, default):
1106 """Split a string into the optional pattern kind prefix and the actual
1106 """Split a string into the optional pattern kind prefix and the actual
1107 pattern."""
1107 pattern."""
1108 if ':' in pattern:
1108 if ':' in pattern:
1109 kind, pat = pattern.split(':', 1)
1109 kind, pat = pattern.split(':', 1)
1110 if kind in allpatternkinds:
1110 if kind in allpatternkinds:
1111 return kind, pat
1111 return kind, pat
1112 return default, pattern
1112 return default, pattern
1113
1113
1114 def _globre(pat):
1114 def _globre(pat):
1115 r'''Convert an extended glob string to a regexp string.
1115 r'''Convert an extended glob string to a regexp string.
1116
1116
1117 >>> from . import pycompat
1117 >>> from . import pycompat
1118 >>> def bprint(s):
1118 >>> def bprint(s):
1119 ... print(pycompat.sysstr(s))
1119 ... print(pycompat.sysstr(s))
1120 >>> bprint(_globre(br'?'))
1120 >>> bprint(_globre(br'?'))
1121 .
1121 .
1122 >>> bprint(_globre(br'*'))
1122 >>> bprint(_globre(br'*'))
1123 [^/]*
1123 [^/]*
1124 >>> bprint(_globre(br'**'))
1124 >>> bprint(_globre(br'**'))
1125 .*
1125 .*
1126 >>> bprint(_globre(br'**/a'))
1126 >>> bprint(_globre(br'**/a'))
1127 (?:.*/)?a
1127 (?:.*/)?a
1128 >>> bprint(_globre(br'a/**/b'))
1128 >>> bprint(_globre(br'a/**/b'))
1129 a/(?:.*/)?b
1129 a/(?:.*/)?b
1130 >>> bprint(_globre(br'[a*?!^][^b][!c]'))
1130 >>> bprint(_globre(br'[a*?!^][^b][!c]'))
1131 [a*?!^][\^b][^c]
1131 [a*?!^][\^b][^c]
1132 >>> bprint(_globre(br'{a,b}'))
1132 >>> bprint(_globre(br'{a,b}'))
1133 (?:a|b)
1133 (?:a|b)
1134 >>> bprint(_globre(br'.\*\?'))
1134 >>> bprint(_globre(br'.\*\?'))
1135 \.\*\?
1135 \.\*\?
1136 '''
1136 '''
1137 i, n = 0, len(pat)
1137 i, n = 0, len(pat)
1138 res = ''
1138 res = ''
1139 group = 0
1139 group = 0
1140 escape = util.stringutil.regexbytesescapemap.get
1140 escape = util.stringutil.regexbytesescapemap.get
1141 def peek():
1141 def peek():
1142 return i < n and pat[i:i + 1]
1142 return i < n and pat[i:i + 1]
1143 while i < n:
1143 while i < n:
1144 c = pat[i:i + 1]
1144 c = pat[i:i + 1]
1145 i += 1
1145 i += 1
1146 if c not in '*?[{},\\':
1146 if c not in '*?[{},\\':
1147 res += escape(c, c)
1147 res += escape(c, c)
1148 elif c == '*':
1148 elif c == '*':
1149 if peek() == '*':
1149 if peek() == '*':
1150 i += 1
1150 i += 1
1151 if peek() == '/':
1151 if peek() == '/':
1152 i += 1
1152 i += 1
1153 res += '(?:.*/)?'
1153 res += '(?:.*/)?'
1154 else:
1154 else:
1155 res += '.*'
1155 res += '.*'
1156 else:
1156 else:
1157 res += '[^/]*'
1157 res += '[^/]*'
1158 elif c == '?':
1158 elif c == '?':
1159 res += '.'
1159 res += '.'
1160 elif c == '[':
1160 elif c == '[':
1161 j = i
1161 j = i
1162 if j < n and pat[j:j + 1] in '!]':
1162 if j < n and pat[j:j + 1] in '!]':
1163 j += 1
1163 j += 1
1164 while j < n and pat[j:j + 1] != ']':
1164 while j < n and pat[j:j + 1] != ']':
1165 j += 1
1165 j += 1
1166 if j >= n:
1166 if j >= n:
1167 res += '\\['
1167 res += '\\['
1168 else:
1168 else:
1169 stuff = pat[i:j].replace('\\','\\\\')
1169 stuff = pat[i:j].replace('\\','\\\\')
1170 i = j + 1
1170 i = j + 1
1171 if stuff[0:1] == '!':
1171 if stuff[0:1] == '!':
1172 stuff = '^' + stuff[1:]
1172 stuff = '^' + stuff[1:]
1173 elif stuff[0:1] == '^':
1173 elif stuff[0:1] == '^':
1174 stuff = '\\' + stuff
1174 stuff = '\\' + stuff
1175 res = '%s[%s]' % (res, stuff)
1175 res = '%s[%s]' % (res, stuff)
1176 elif c == '{':
1176 elif c == '{':
1177 group += 1
1177 group += 1
1178 res += '(?:'
1178 res += '(?:'
1179 elif c == '}' and group:
1179 elif c == '}' and group:
1180 res += ')'
1180 res += ')'
1181 group -= 1
1181 group -= 1
1182 elif c == ',' and group:
1182 elif c == ',' and group:
1183 res += '|'
1183 res += '|'
1184 elif c == '\\':
1184 elif c == '\\':
1185 p = peek()
1185 p = peek()
1186 if p:
1186 if p:
1187 i += 1
1187 i += 1
1188 res += escape(p, p)
1188 res += escape(p, p)
1189 else:
1189 else:
1190 res += escape(c, c)
1190 res += escape(c, c)
1191 else:
1191 else:
1192 res += escape(c, c)
1192 res += escape(c, c)
1193 return res
1193 return res
1194
1194
1195 def _regex(kind, pat, globsuffix):
1195 def _regex(kind, pat, globsuffix):
1196 '''Convert a (normalized) pattern of any kind into a
1196 '''Convert a (normalized) pattern of any kind into a
1197 regular expression.
1197 regular expression.
1198 globsuffix is appended to the regexp of globs.'''
1198 globsuffix is appended to the regexp of globs.'''
1199
1199
1200 if rustext is not None:
1200 if rustext is not None:
1201 try:
1201 try:
1202 return rustext.filepatterns.build_single_regex(
1202 return rustext.filepatterns.build_single_regex(
1203 kind,
1203 kind,
1204 pat,
1204 pat,
1205 globsuffix
1205 globsuffix
1206 )
1206 )
1207 except rustext.filepatterns.PatternError:
1207 except rustext.filepatterns.PatternError:
1208 raise error.ProgrammingError(
1208 raise error.ProgrammingError(
1209 'not a regex pattern: %s:%s' % (kind, pat)
1209 'not a regex pattern: %s:%s' % (kind, pat)
1210 )
1210 )
1211
1211
1212 if not pat and kind in ('glob', 'relpath'):
1212 if not pat and kind in ('glob', 'relpath'):
1213 return ''
1213 return ''
1214 if kind == 're':
1214 if kind == 're':
1215 return pat
1215 return pat
1216 if kind in ('path', 'relpath'):
1216 if kind in ('path', 'relpath'):
1217 if pat == '.':
1217 if pat == '.':
1218 return ''
1218 return ''
1219 return util.stringutil.reescape(pat) + '(?:/|$)'
1219 return util.stringutil.reescape(pat) + '(?:/|$)'
1220 if kind == 'rootfilesin':
1220 if kind == 'rootfilesin':
1221 if pat == '.':
1221 if pat == '.':
1222 escaped = ''
1222 escaped = ''
1223 else:
1223 else:
1224 # Pattern is a directory name.
1224 # Pattern is a directory name.
1225 escaped = util.stringutil.reescape(pat) + '/'
1225 escaped = util.stringutil.reescape(pat) + '/'
1226 # Anything after the pattern must be a non-directory.
1226 # Anything after the pattern must be a non-directory.
1227 return escaped + '[^/]+$'
1227 return escaped + '[^/]+$'
1228 if kind == 'relglob':
1228 if kind == 'relglob':
1229 return '(?:|.*/)' + _globre(pat) + globsuffix
1229 return '(?:|.*/)' + _globre(pat) + globsuffix
1230 if kind == 'relre':
1230 if kind == 'relre':
1231 if pat.startswith('^'):
1231 if pat.startswith('^'):
1232 return pat
1232 return pat
1233 return '.*' + pat
1233 return '.*' + pat
1234 if kind in ('glob', 'rootglob'):
1234 if kind in ('glob', 'rootglob'):
1235 return _globre(pat) + globsuffix
1235 return _globre(pat) + globsuffix
1236 raise error.ProgrammingError('not a regex pattern: %s:%s' % (kind, pat))
1236 raise error.ProgrammingError('not a regex pattern: %s:%s' % (kind, pat))
1237
1237
1238 def _buildmatch(kindpats, globsuffix, root):
1238 def _buildmatch(kindpats, globsuffix, root):
1239 '''Return regexp string and a matcher function for kindpats.
1239 '''Return regexp string and a matcher function for kindpats.
1240 globsuffix is appended to the regexp of globs.'''
1240 globsuffix is appended to the regexp of globs.'''
1241 matchfuncs = []
1241 matchfuncs = []
1242
1242
1243 subincludes, kindpats = _expandsubinclude(kindpats, root)
1243 subincludes, kindpats = _expandsubinclude(kindpats, root)
1244 if subincludes:
1244 if subincludes:
1245 submatchers = {}
1245 submatchers = {}
1246 def matchsubinclude(f):
1246 def matchsubinclude(f):
1247 for prefix, matcherargs in subincludes:
1247 for prefix, matcherargs in subincludes:
1248 if f.startswith(prefix):
1248 if f.startswith(prefix):
1249 mf = submatchers.get(prefix)
1249 mf = submatchers.get(prefix)
1250 if mf is None:
1250 if mf is None:
1251 mf = match(*matcherargs)
1251 mf = match(*matcherargs)
1252 submatchers[prefix] = mf
1252 submatchers[prefix] = mf
1253
1253
1254 if mf(f[len(prefix):]):
1254 if mf(f[len(prefix):]):
1255 return True
1255 return True
1256 return False
1256 return False
1257 matchfuncs.append(matchsubinclude)
1257 matchfuncs.append(matchsubinclude)
1258
1258
1259 regex = ''
1259 regex = ''
1260 if kindpats:
1260 if kindpats:
1261 if all(k == 'rootfilesin' for k, p, s in kindpats):
1261 if all(k == 'rootfilesin' for k, p, s in kindpats):
1262 dirs = {p for k, p, s in kindpats}
1262 dirs = {p for k, p, s in kindpats}
1263 def mf(f):
1263 def mf(f):
1264 i = f.rfind('/')
1264 i = f.rfind('/')
1265 if i >= 0:
1265 if i >= 0:
1266 dir = f[:i]
1266 dir = f[:i]
1267 else:
1267 else:
1268 dir = '.'
1268 dir = '.'
1269 return dir in dirs
1269 return dir in dirs
1270 regex = b'rootfilesin: %s' % stringutil.pprint(list(sorted(dirs)))
1270 regex = b'rootfilesin: %s' % stringutil.pprint(list(sorted(dirs)))
1271 matchfuncs.append(mf)
1271 matchfuncs.append(mf)
1272 else:
1272 else:
1273 regex, mf = _buildregexmatch(kindpats, globsuffix)
1273 regex, mf = _buildregexmatch(kindpats, globsuffix)
1274 matchfuncs.append(mf)
1274 matchfuncs.append(mf)
1275
1275
1276 if len(matchfuncs) == 1:
1276 if len(matchfuncs) == 1:
1277 return regex, matchfuncs[0]
1277 return regex, matchfuncs[0]
1278 else:
1278 else:
1279 return regex, lambda f: any(mf(f) for mf in matchfuncs)
1279 return regex, lambda f: any(mf(f) for mf in matchfuncs)
1280
1280
1281 MAX_RE_SIZE = 20000
1281 MAX_RE_SIZE = 20000
1282
1282
1283 def _joinregexes(regexps):
1283 def _joinregexes(regexps):
1284 """gather multiple regular expressions into a single one"""
1284 """gather multiple regular expressions into a single one"""
1285 return '|'.join(regexps)
1285 return '|'.join(regexps)
1286
1286
1287 def _buildregexmatch(kindpats, globsuffix):
1287 def _buildregexmatch(kindpats, globsuffix):
1288 """Build a match function from a list of kinds and kindpats,
1288 """Build a match function from a list of kinds and kindpats,
1289 return regexp string and a matcher function.
1289 return regexp string and a matcher function.
1290
1290
1291 Test too large input
1291 Test too large input
1292 >>> _buildregexmatch([
1292 >>> _buildregexmatch([
1293 ... (b'relglob', b'?' * MAX_RE_SIZE, b'')
1293 ... (b'relglob', b'?' * MAX_RE_SIZE, b'')
1294 ... ], b'$')
1294 ... ], b'$')
1295 Traceback (most recent call last):
1295 Traceback (most recent call last):
1296 ...
1296 ...
1297 Abort: matcher pattern is too long (20009 bytes)
1297 Abort: matcher pattern is too long (20009 bytes)
1298 """
1298 """
1299 try:
1299 try:
1300 allgroups = []
1300 allgroups = []
1301 regexps = [_regex(k, p, globsuffix) for (k, p, s) in kindpats]
1301 regexps = [_regex(k, p, globsuffix) for (k, p, s) in kindpats]
1302 fullregexp = _joinregexes(regexps)
1302 fullregexp = _joinregexes(regexps)
1303
1303
1304 startidx = 0
1304 startidx = 0
1305 groupsize = 0
1305 groupsize = 0
1306 for idx, r in enumerate(regexps):
1306 for idx, r in enumerate(regexps):
1307 piecesize = len(r)
1307 piecesize = len(r)
1308 if piecesize > MAX_RE_SIZE:
1308 if piecesize > MAX_RE_SIZE:
1309 msg = _("matcher pattern is too long (%d bytes)") % piecesize
1309 msg = _("matcher pattern is too long (%d bytes)") % piecesize
1310 raise error.Abort(msg)
1310 raise error.Abort(msg)
1311 elif (groupsize + piecesize) > MAX_RE_SIZE:
1311 elif (groupsize + piecesize) > MAX_RE_SIZE:
1312 group = regexps[startidx:idx]
1312 group = regexps[startidx:idx]
1313 allgroups.append(_joinregexes(group))
1313 allgroups.append(_joinregexes(group))
1314 startidx = idx
1314 startidx = idx
1315 groupsize = 0
1315 groupsize = 0
1316 groupsize += piecesize + 1
1316 groupsize += piecesize + 1
1317
1317
1318 if startidx == 0:
1318 if startidx == 0:
1319 matcher = _rematcher(fullregexp)
1319 matcher = _rematcher(fullregexp)
1320 func = lambda s: bool(matcher(s))
1320 func = lambda s: bool(matcher(s))
1321 else:
1321 else:
1322 group = regexps[startidx:]
1322 group = regexps[startidx:]
1323 allgroups.append(_joinregexes(group))
1323 allgroups.append(_joinregexes(group))
1324 allmatchers = [_rematcher(g) for g in allgroups]
1324 allmatchers = [_rematcher(g) for g in allgroups]
1325 func = lambda s: any(m(s) for m in allmatchers)
1325 func = lambda s: any(m(s) for m in allmatchers)
1326 return fullregexp, func
1326 return fullregexp, func
1327 except re.error:
1327 except re.error:
1328 for k, p, s in kindpats:
1328 for k, p, s in kindpats:
1329 try:
1329 try:
1330 _rematcher(_regex(k, p, globsuffix))
1330 _rematcher(_regex(k, p, globsuffix))
1331 except re.error:
1331 except re.error:
1332 if s:
1332 if s:
1333 raise error.Abort(_("%s: invalid pattern (%s): %s") %
1333 raise error.Abort(_("%s: invalid pattern (%s): %s") %
1334 (s, k, p))
1334 (s, k, p))
1335 else:
1335 else:
1336 raise error.Abort(_("invalid pattern (%s): %s") % (k, p))
1336 raise error.Abort(_("invalid pattern (%s): %s") % (k, p))
1337 raise error.Abort(_("invalid pattern"))
1337 raise error.Abort(_("invalid pattern"))
1338
1338
1339 def _patternrootsanddirs(kindpats):
1339 def _patternrootsanddirs(kindpats):
1340 '''Returns roots and directories corresponding to each pattern.
1340 '''Returns roots and directories corresponding to each pattern.
1341
1341
1342 This calculates the roots and directories exactly matching the patterns and
1342 This calculates the roots and directories exactly matching the patterns and
1343 returns a tuple of (roots, dirs) for each. It does not return other
1343 returns a tuple of (roots, dirs) for each. It does not return other
1344 directories which may also need to be considered, like the parent
1344 directories which may also need to be considered, like the parent
1345 directories.
1345 directories.
1346 '''
1346 '''
1347 r = []
1347 r = []
1348 d = []
1348 d = []
1349 for kind, pat, source in kindpats:
1349 for kind, pat, source in kindpats:
1350 if kind in ('glob', 'rootglob'): # find the non-glob prefix
1350 if kind in ('glob', 'rootglob'): # find the non-glob prefix
1351 root = []
1351 root = []
1352 for p in pat.split('/'):
1352 for p in pat.split('/'):
1353 if '[' in p or '{' in p or '*' in p or '?' in p:
1353 if '[' in p or '{' in p or '*' in p or '?' in p:
1354 break
1354 break
1355 root.append(p)
1355 root.append(p)
1356 r.append('/'.join(root))
1356 r.append('/'.join(root))
1357 elif kind in ('relpath', 'path'):
1357 elif kind in ('relpath', 'path'):
1358 if pat == '.':
1358 if pat == '.':
1359 pat = ''
1359 pat = ''
1360 r.append(pat)
1360 r.append(pat)
1361 elif kind in ('rootfilesin',):
1361 elif kind in ('rootfilesin',):
1362 if pat == '.':
1362 if pat == '.':
1363 pat = ''
1363 pat = ''
1364 d.append(pat)
1364 d.append(pat)
1365 else: # relglob, re, relre
1365 else: # relglob, re, relre
1366 r.append('')
1366 r.append('')
1367 return r, d
1367 return r, d
1368
1368
1369 def _roots(kindpats):
1369 def _roots(kindpats):
1370 '''Returns root directories to match recursively from the given patterns.'''
1370 '''Returns root directories to match recursively from the given patterns.'''
1371 roots, dirs = _patternrootsanddirs(kindpats)
1371 roots, dirs = _patternrootsanddirs(kindpats)
1372 return roots
1372 return roots
1373
1373
1374 def _rootsdirsandparents(kindpats):
1374 def _rootsdirsandparents(kindpats):
1375 '''Returns roots and exact directories from patterns.
1375 '''Returns roots and exact directories from patterns.
1376
1376
1377 `roots` are directories to match recursively, `dirs` should
1377 `roots` are directories to match recursively, `dirs` should
1378 be matched non-recursively, and `parents` are the implicitly required
1378 be matched non-recursively, and `parents` are the implicitly required
1379 directories to walk to items in either roots or dirs.
1379 directories to walk to items in either roots or dirs.
1380
1380
1381 Returns a tuple of (roots, dirs, parents).
1381 Returns a tuple of (roots, dirs, parents).
1382
1382
1383 >>> _rootsdirsandparents(
1383 >>> r = _rootsdirsandparents(
1384 ... [(b'glob', b'g/h/*', b''), (b'glob', b'g/h', b''),
1384 ... [(b'glob', b'g/h/*', b''), (b'glob', b'g/h', b''),
1385 ... (b'glob', b'g*', b'')])
1385 ... (b'glob', b'g*', b'')])
1386 (['g/h', 'g/h', ''], [], set(['', 'g']))
1386 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1387 >>> _rootsdirsandparents(
1387 (['g/h', 'g/h', ''], []) ['', 'g']
1388 >>> r = _rootsdirsandparents(
1388 ... [(b'rootfilesin', b'g/h', b''), (b'rootfilesin', b'', b'')])
1389 ... [(b'rootfilesin', b'g/h', b''), (b'rootfilesin', b'', b'')])
1389 ([], ['g/h', ''], set(['', 'g']))
1390 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1390 >>> _rootsdirsandparents(
1391 ([], ['g/h', '']) ['', 'g']
1392 >>> r = _rootsdirsandparents(
1391 ... [(b'relpath', b'r', b''), (b'path', b'p/p', b''),
1393 ... [(b'relpath', b'r', b''), (b'path', b'p/p', b''),
1392 ... (b'path', b'', b'')])
1394 ... (b'path', b'', b'')])
1393 (['r', 'p/p', ''], [], set(['', 'p']))
1395 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1394 >>> _rootsdirsandparents(
1396 (['r', 'p/p', ''], []) ['', 'p']
1397 >>> r = _rootsdirsandparents(
1395 ... [(b'relglob', b'rg*', b''), (b're', b're/', b''),
1398 ... [(b'relglob', b'rg*', b''), (b're', b're/', b''),
1396 ... (b'relre', b'rr', b'')])
1399 ... (b'relre', b'rr', b'')])
1397 (['', '', ''], [], set(['']))
1400 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
1401 (['', '', ''], []) ['']
1398 '''
1402 '''
1399 r, d = _patternrootsanddirs(kindpats)
1403 r, d = _patternrootsanddirs(kindpats)
1400
1404
1401 p = set()
1405 p = set()
1402 # Add the parents as non-recursive/exact directories, since they must be
1406 # Add the parents as non-recursive/exact directories, since they must be
1403 # scanned to get to either the roots or the other exact directories.
1407 # scanned to get to either the roots or the other exact directories.
1404 p.update(util.dirs(d))
1408 p.update(util.dirs(d))
1405 p.update(util.dirs(r))
1409 p.update(util.dirs(r))
1406
1410
1407 # FIXME: all uses of this function convert these to sets, do so before
1411 # FIXME: all uses of this function convert these to sets, do so before
1408 # returning.
1412 # returning.
1409 # FIXME: all uses of this function do not need anything in 'roots' and
1413 # FIXME: all uses of this function do not need anything in 'roots' and
1410 # 'dirs' to also be in 'parents', consider removing them before returning.
1414 # 'dirs' to also be in 'parents', consider removing them before returning.
1411 return r, d, p
1415 return r, d, p
1412
1416
1413 def _explicitfiles(kindpats):
1417 def _explicitfiles(kindpats):
1414 '''Returns the potential explicit filenames from the patterns.
1418 '''Returns the potential explicit filenames from the patterns.
1415
1419
1416 >>> _explicitfiles([(b'path', b'foo/bar', b'')])
1420 >>> _explicitfiles([(b'path', b'foo/bar', b'')])
1417 ['foo/bar']
1421 ['foo/bar']
1418 >>> _explicitfiles([(b'rootfilesin', b'foo/bar', b'')])
1422 >>> _explicitfiles([(b'rootfilesin', b'foo/bar', b'')])
1419 []
1423 []
1420 '''
1424 '''
1421 # Keep only the pattern kinds where one can specify filenames (vs only
1425 # Keep only the pattern kinds where one can specify filenames (vs only
1422 # directory names).
1426 # directory names).
1423 filable = [kp for kp in kindpats if kp[0] not in ('rootfilesin',)]
1427 filable = [kp for kp in kindpats if kp[0] not in ('rootfilesin',)]
1424 return _roots(filable)
1428 return _roots(filable)
1425
1429
1426 def _prefix(kindpats):
1430 def _prefix(kindpats):
1427 '''Whether all the patterns match a prefix (i.e. recursively)'''
1431 '''Whether all the patterns match a prefix (i.e. recursively)'''
1428 for kind, pat, source in kindpats:
1432 for kind, pat, source in kindpats:
1429 if kind not in ('path', 'relpath'):
1433 if kind not in ('path', 'relpath'):
1430 return False
1434 return False
1431 return True
1435 return True
1432
1436
1433 _commentre = None
1437 _commentre = None
1434
1438
1435 def readpatternfile(filepath, warn, sourceinfo=False):
1439 def readpatternfile(filepath, warn, sourceinfo=False):
1436 '''parse a pattern file, returning a list of
1440 '''parse a pattern file, returning a list of
1437 patterns. These patterns should be given to compile()
1441 patterns. These patterns should be given to compile()
1438 to be validated and converted into a match function.
1442 to be validated and converted into a match function.
1439
1443
1440 trailing white space is dropped.
1444 trailing white space is dropped.
1441 the escape character is backslash.
1445 the escape character is backslash.
1442 comments start with #.
1446 comments start with #.
1443 empty lines are skipped.
1447 empty lines are skipped.
1444
1448
1445 lines can be of the following formats:
1449 lines can be of the following formats:
1446
1450
1447 syntax: regexp # defaults following lines to non-rooted regexps
1451 syntax: regexp # defaults following lines to non-rooted regexps
1448 syntax: glob # defaults following lines to non-rooted globs
1452 syntax: glob # defaults following lines to non-rooted globs
1449 re:pattern # non-rooted regular expression
1453 re:pattern # non-rooted regular expression
1450 glob:pattern # non-rooted glob
1454 glob:pattern # non-rooted glob
1451 rootglob:pat # rooted glob (same root as ^ in regexps)
1455 rootglob:pat # rooted glob (same root as ^ in regexps)
1452 pattern # pattern of the current default type
1456 pattern # pattern of the current default type
1453
1457
1454 if sourceinfo is set, returns a list of tuples:
1458 if sourceinfo is set, returns a list of tuples:
1455 (pattern, lineno, originalline).
1459 (pattern, lineno, originalline).
1456 This is useful to debug ignore patterns.
1460 This is useful to debug ignore patterns.
1457 '''
1461 '''
1458
1462
1459 if rustext is not None:
1463 if rustext is not None:
1460 result, warnings = rustext.filepatterns.read_pattern_file(
1464 result, warnings = rustext.filepatterns.read_pattern_file(
1461 filepath,
1465 filepath,
1462 bool(warn),
1466 bool(warn),
1463 sourceinfo,
1467 sourceinfo,
1464 )
1468 )
1465
1469
1466 for warning_params in warnings:
1470 for warning_params in warnings:
1467 # Can't be easily emitted from Rust, because it would require
1471 # Can't be easily emitted from Rust, because it would require
1468 # a mechanism for both gettext and calling the `warn` function.
1472 # a mechanism for both gettext and calling the `warn` function.
1469 warn(_("%s: ignoring invalid syntax '%s'\n") % warning_params)
1473 warn(_("%s: ignoring invalid syntax '%s'\n") % warning_params)
1470
1474
1471 return result
1475 return result
1472
1476
1473 syntaxes = {
1477 syntaxes = {
1474 're': 'relre:',
1478 're': 'relre:',
1475 'regexp': 'relre:',
1479 'regexp': 'relre:',
1476 'glob': 'relglob:',
1480 'glob': 'relglob:',
1477 'rootglob': 'rootglob:',
1481 'rootglob': 'rootglob:',
1478 'include': 'include',
1482 'include': 'include',
1479 'subinclude': 'subinclude',
1483 'subinclude': 'subinclude',
1480 }
1484 }
1481 syntax = 'relre:'
1485 syntax = 'relre:'
1482 patterns = []
1486 patterns = []
1483
1487
1484 fp = open(filepath, 'rb')
1488 fp = open(filepath, 'rb')
1485 for lineno, line in enumerate(util.iterfile(fp), start=1):
1489 for lineno, line in enumerate(util.iterfile(fp), start=1):
1486 if "#" in line:
1490 if "#" in line:
1487 global _commentre
1491 global _commentre
1488 if not _commentre:
1492 if not _commentre:
1489 _commentre = util.re.compile(br'((?:^|[^\\])(?:\\\\)*)#.*')
1493 _commentre = util.re.compile(br'((?:^|[^\\])(?:\\\\)*)#.*')
1490 # remove comments prefixed by an even number of escapes
1494 # remove comments prefixed by an even number of escapes
1491 m = _commentre.search(line)
1495 m = _commentre.search(line)
1492 if m:
1496 if m:
1493 line = line[:m.end(1)]
1497 line = line[:m.end(1)]
1494 # fixup properly escaped comments that survived the above
1498 # fixup properly escaped comments that survived the above
1495 line = line.replace("\\#", "#")
1499 line = line.replace("\\#", "#")
1496 line = line.rstrip()
1500 line = line.rstrip()
1497 if not line:
1501 if not line:
1498 continue
1502 continue
1499
1503
1500 if line.startswith('syntax:'):
1504 if line.startswith('syntax:'):
1501 s = line[7:].strip()
1505 s = line[7:].strip()
1502 try:
1506 try:
1503 syntax = syntaxes[s]
1507 syntax = syntaxes[s]
1504 except KeyError:
1508 except KeyError:
1505 if warn:
1509 if warn:
1506 warn(_("%s: ignoring invalid syntax '%s'\n") %
1510 warn(_("%s: ignoring invalid syntax '%s'\n") %
1507 (filepath, s))
1511 (filepath, s))
1508 continue
1512 continue
1509
1513
1510 linesyntax = syntax
1514 linesyntax = syntax
1511 for s, rels in syntaxes.iteritems():
1515 for s, rels in syntaxes.iteritems():
1512 if line.startswith(rels):
1516 if line.startswith(rels):
1513 linesyntax = rels
1517 linesyntax = rels
1514 line = line[len(rels):]
1518 line = line[len(rels):]
1515 break
1519 break
1516 elif line.startswith(s+':'):
1520 elif line.startswith(s+':'):
1517 linesyntax = rels
1521 linesyntax = rels
1518 line = line[len(s) + 1:]
1522 line = line[len(s) + 1:]
1519 break
1523 break
1520 if sourceinfo:
1524 if sourceinfo:
1521 patterns.append((linesyntax + line, lineno, line))
1525 patterns.append((linesyntax + line, lineno, line))
1522 else:
1526 else:
1523 patterns.append(linesyntax + line)
1527 patterns.append(linesyntax + line)
1524 fp.close()
1528 fp.close()
1525 return patterns
1529 return patterns
General Comments 0
You need to be logged in to leave comments. Login now